[Mesa-dev] [PATCH 1/9] radeonsi: add struct si_compiler containing LLVMTargetMachineRef

Tue Apr 17 00:52:12 UTC 2018

From: Marek Olšák <marek.olsak at amd.com>

It will contain more variables.
---
 src/gallium/drivers/radeonsi/si_compute.c     |  8 +--
 src/gallium/drivers/radeonsi/si_pipe.c        | 52 ++++++++-------
 src/gallium/drivers/radeonsi/si_pipe.h        |  6 +-
 src/gallium/drivers/radeonsi/si_shader.c      | 66 +++++++++----------
 src/gallium/drivers/radeonsi/si_shader.h      | 13 ++--
 .../drivers/radeonsi/si_shader_internal.h     |  6 +-
 .../drivers/radeonsi/si_shader_tgsi_setup.c   | 13 ++--
 .../drivers/radeonsi/si_state_shaders.c       | 28 ++++----
 8 files changed, 101 insertions(+), 91 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 69c3dce0124..e95e79c7b46 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -79,27 +79,27 @@ static void code_object_to_config(const amd_kernel_code_t *code_object,
 	out_config->scratch_bytes_per_wave =
 		align(code_object->workitem_private_segment_byte_size * 64, 1024);
 }
 
 /* Asynchronous compute shader compilation. */
 static void si_create_compute_state_async(void *job, int thread_index)
 {
 	struct si_compute *program = (struct si_compute *)job;
 	struct si_shader *shader = &program->shader;
 	struct si_shader_selector sel;
-	LLVMTargetMachineRef tm;
+	struct si_compiler *compiler;
 	struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
 
 	assert(!debug->debug_message || debug->async);
 	assert(thread_index >= 0);
-	assert(thread_index < ARRAY_SIZE(program->screen->tm));
-	tm = program->screen->tm[thread_index];
+	assert(thread_index < ARRAY_SIZE(program->screen->compiler));
+	compiler = &program->screen->compiler[thread_index];
 
 	memset(&sel, 0, sizeof(sel));
 
 	sel.screen = program->screen;
 
 	if (program->ir_type == PIPE_SHADER_IR_TGSI) {
 		tgsi_scan_shader(program->ir.tgsi, &sel.info);
 		sel.tokens = program->ir.tgsi;
 	} else {
 		assert(program->ir_type == PIPE_SHADER_IR_NIR);
@@ -116,21 +116,21 @@ static void si_create_compute_state_async(void *job, int thread_index)
 				 &program->active_const_and_shader_buffers,
 				 &program->active_samplers_and_images);
 
 	program->shader.selector = &sel;
 	program->shader.is_monolithic = true;
 	program->uses_grid_size = sel.info.uses_grid_size;
 	program->uses_block_size = sel.info.uses_block_size;
 	program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
 	program->uses_bindless_images = sel.info.uses_bindless_images;
 
-	if (si_shader_create(program->screen, tm, &program->shader, debug)) {
+	if (si_shader_create(program->screen, compiler, &program->shader, debug)) {
 		program->shader.compilation_failed = true;
 	} else {
 		bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
 		unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
 				      (sel.info.uses_grid_size ? 3 : 0) +
 				      (sel.info.uses_block_size ? 3 : 0);
 
 		shader->config.rsrc1 =
 			S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
 			S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3de843af11c..fcf3556bcc1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -95,20 +95,38 @@ static const struct debug_named_value debug_options[] = {
 
 	/* Tests: */
 	{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
 	{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
 	{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
 	{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
 
 	DEBUG_NAMED_VALUE_END /* must be last */
 };
 
+static void si_init_compiler(struct si_screen *sscreen,
+			     struct si_compiler *compiler)
+{
+	enum ac_target_machine_options tm_options =
+		(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
+		(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
+		(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
+		(!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
+
+	compiler->tm = ac_create_target_machine(sscreen->info.family, tm_options);
+}
+
+static void si_destroy_compiler(struct si_compiler *compiler)
+{
+	if (compiler->tm)
+		LLVMDisposeTargetMachine(compiler->tm);
+}
+
 /*
  * pipe_context
  */
 static void si_destroy_context(struct pipe_context *context)
 {
 	struct si_context *sctx = (struct si_context *)context;
 	int i;
 
 	/* Unreference the framebuffer normally to disable related logic
 	 * properly.
@@ -193,21 +211,21 @@ static void si_destroy_context(struct pipe_context *context)
 	slab_destroy_child(&sctx->pool_transfers);
 	slab_destroy_child(&sctx->pool_transfers_unsync);
 
 	if (sctx->allocator_zeroed_memory)
 		u_suballocator_destroy(sctx->allocator_zeroed_memory);
 
 	sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
 	sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL);
 	r600_resource_reference(&sctx->eop_bug_scratch, NULL);
 
-	LLVMDisposeTargetMachine(sctx->tm);
+	si_destroy_compiler(&sctx->compiler);
 
 	si_saved_cs_reference(&sctx->current_saved_cs, NULL);
 
 	_mesa_hash_table_destroy(sctx->tex_handles, NULL);
 	_mesa_hash_table_destroy(sctx->img_handles, NULL);
 
 	util_dynarray_fini(&sctx->resident_tex_handles);
 	util_dynarray_fini(&sctx->resident_img_handles);
 	util_dynarray_fini(&sctx->resident_tex_needs_color_decompress);
 	util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
@@ -278,32 +296,20 @@ static void si_emit_string_marker(struct pipe_context *ctx,
 				  const char *string, int len)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 
 	dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
 
 	if (sctx->log)
 		u_log_printf(sctx->log, "\nString marker: %*s\n", len, string);
 }
 
-static LLVMTargetMachineRef
-si_create_llvm_target_machine(struct si_screen *sscreen)
-{
-	enum ac_target_machine_options tm_options =
-		(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
-		(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
-		(sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
-		(!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
-
-	return ac_create_target_machine(sscreen->info.family, tm_options);
-}
-
 static void si_set_debug_callback(struct pipe_context *ctx,
 				  const struct pipe_debug_callback *cb)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_screen *screen = sctx->screen;
 
 	util_queue_finish(&screen->shader_compiler_queue);
 	util_queue_finish(&screen->shader_compiler_queue_low_priority);
 
 	if (cb)
@@ -543,21 +549,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	 * 1 threadgroup, so that the hw doesn't hang from being unable
 	 * to start any.
 	 *
 	 * The recommended value is 4 per CU at most. Higher numbers don't
 	 * bring much benefit, but they still occupy chip resources (think
 	 * async compute). I've seen ~2% performance difference between 4 and 32.
 	 */
 	sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units,
 				   max_threads_per_block / 64);
 
-	sctx->tm = si_create_llvm_target_machine(sscreen);
+	si_init_compiler(sscreen, &sctx->compiler);
 
 	/* Bindless handles. */
 	sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
 						    _mesa_key_pointer_equal);
 	sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
 						    _mesa_key_pointer_equal);
 
 	util_dynarray_init(&sctx->resident_tex_handles, NULL);
 	util_dynarray_init(&sctx->resident_img_handles, NULL);
 	util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL);
@@ -616,27 +622,25 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
 		sscreen->ps_epilogs
 	};
 	unsigned i;
 
 	if (!sscreen->ws->unref(sscreen->ws))
 		return;
 
 	util_queue_destroy(&sscreen->shader_compiler_queue);
 	util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
 
-	for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
-		if (sscreen->tm[i])
-			LLVMDisposeTargetMachine(sscreen->tm[i]);
+	for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
+		si_destroy_compiler(&sscreen->compiler[i]);
 
-	for (i = 0; i < ARRAY_SIZE(sscreen->tm_low_priority); i++)
-		if (sscreen->tm_low_priority[i])
-			LLVMDisposeTargetMachine(sscreen->tm_low_priority[i]);
+	for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++)
+		si_destroy_compiler(&sscreen->compiler_lowp[i]);
 
 	/* Free shader parts. */
 	for (i = 0; i < ARRAY_SIZE(parts); i++) {
 		while (parts[i]) {
 			struct si_shader_part *part = parts[i];
 
 			parts[i] = part->next;
 			ac_shader_binary_clean(&part->binary);
 			FREE(part);
 		}
@@ -857,23 +861,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 		return NULL;
 	}
 
 	si_disk_cache_create(sscreen);
 
 	/* Only enable as many threads as we have target machines, but at most
 	 * the number of CPUs - 1 if there is more than one.
 	 */
 	num_threads = sysconf(_SC_NPROCESSORS_ONLN);
 	num_threads = MAX2(1, num_threads - 1);
-	num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
+	num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler));
 	num_compiler_threads_lowprio =
-		MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
+		MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp));
 
 	if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
 			     32, num_compiler_threads,
 			     UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
 		si_destroy_shader_cache(sscreen);
 		FREE(sscreen);
 		return NULL;
 	}
 
 	if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
@@ -1023,23 +1027,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 					    SI_CONTEXT_INV_VMEM_L1;
 	if (sscreen->info.chip_class <= VI) {
 		sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
 		sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
 	}
 
 	if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
 		sscreen->debug_flags |= DBG_ALL_SHADERS;
 
 	for (i = 0; i < num_compiler_threads; i++)
-		sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+		si_init_compiler(sscreen, &sscreen->compiler[i]);
 	for (i = 0; i < num_compiler_threads_lowprio; i++)
-		sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen);
+		si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
 
 	/* Create the auxiliary context. This must be done last. */
 	sscreen->aux_context = si_create_context(&sscreen->b, 0);
 
 	if (sscreen->debug_flags & DBG(TEST_DMA))
 		si_test_dma(sscreen);
 
 	if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
 				      DBG(TEST_VMFAULT_SDMA) |
 				      DBG(TEST_VMFAULT_SHADER)))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 823509524d4..54c9b725fcb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -523,26 +523,26 @@ struct si_screen {
 	 *   those as well.
 	 */
 	mtx_t			shader_cache_mutex;
 	struct hash_table		*shader_cache;
 
 	/* Shader compiler queue for multithreaded compilation. */
 	struct util_queue		shader_compiler_queue;
 	/* Use at most 3 normal compiler threads on quadcore and better.
 	 * Hyperthreaded CPUs report the number of threads, but we want
 	 * the number of cores. */
-	LLVMTargetMachineRef		tm[3]; /* used by the queue only */
+	struct si_compiler		compiler[3]; /* used by the queue only */
 
 	struct util_queue		shader_compiler_queue_low_priority;
 	/* Use at most 2 low priority threads on quadcore and better.
 	 * We want to minimize the impact on multithreaded Mesa. */
-	LLVMTargetMachineRef		tm_low_priority[2]; /* at most 2 threads */
+	struct si_compiler		compiler_lowp[2]; /* at most 2 threads */
 };
 
 struct si_blend_color {
 	struct pipe_blend_color		state;
 	bool				any_nonzeros;
 };
 
 struct si_sampler_view {
 	struct pipe_sampler_view	base;
         /* [0..7] = image descriptor
@@ -769,21 +769,21 @@ struct si_context {
 	void				*custom_blend_fmask_decompress;
 	void				*custom_blend_eliminate_fastclear;
 	void				*custom_blend_dcc_decompress;
 	void				*vs_blit_pos;
 	void				*vs_blit_pos_layered;
 	void				*vs_blit_color;
 	void				*vs_blit_color_layered;
 	void				*vs_blit_texcoord;
 	struct si_screen		*screen;
 	struct pipe_debug_callback	debug;
-	LLVMTargetMachineRef		tm; /* only non-threaded compilation */
+	struct si_compiler		compiler; /* only non-threaded compilation */
 	struct si_shader_ctx_state	fixed_func_tcs_shader;
 	struct r600_resource		*wait_mem_scratch;
 	unsigned			wait_mem_number;
 	uint16_t			prefetch_L2_mask;
 
 	bool				gfx_flush_in_progress:1;
 	bool				gfx_last_ib_is_busy:1;
 	bool				compute_is_busy:1;
 
 	unsigned			num_gfx_cs_flushes;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 765daa52bcb..ce654bb7610 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,21 +70,21 @@ struct si_function_info {
 	unsigned num_params;
 };
 
 enum si_arg_regfile {
 	ARG_SGPR,
 	ARG_VGPR
 };
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
 			       struct si_screen *sscreen,
-			       LLVMTargetMachineRef tm);
+			       struct si_compiler *compiler);
 
 static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
 				 struct lp_build_tgsi_context *bld_base,
 				 struct lp_build_emit_data *emit_data);
 
 static void si_dump_shader_key(unsigned processor, const struct si_shader *shader,
 			       FILE *f);
 
 static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 					union si_shader_part_key *key);
@@ -5641,21 +5641,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
 		fprintf(file, "\n");
 	}
 
 	si_shader_dump_stats(sscreen, shader, processor, file,
 			     check_debug_option);
 }
 
 static int si_compile_llvm(struct si_screen *sscreen,
 			   struct ac_shader_binary *binary,
 			   struct si_shader_config *conf,
-			   LLVMTargetMachineRef tm,
+			   struct si_compiler *compiler,
 			   LLVMModuleRef mod,
 			   struct pipe_debug_callback *debug,
 			   unsigned processor,
 			   const char *name)
 {
 	int r = 0;
 	unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
 
 	if (si_can_dump_shader(sscreen, processor)) {
 		fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
@@ -5667,21 +5667,21 @@ static int si_compile_llvm(struct si_screen *sscreen,
 		}
 	}
 
 	if (sscreen->record_llvm_ir) {
 		char *ir = LLVMPrintModuleToString(mod);
 		binary->llvm_ir_string = strdup(ir);
 		LLVMDisposeMessage(ir);
 	}
 
 	if (!si_replace_shader(count, binary)) {
-		r = si_llvm_compile(mod, binary, tm, debug);
+		r = si_llvm_compile(mod, binary, compiler, debug);
 		if (r)
 			return r;
 	}
 
 	si_shader_binary_read_config(binary, conf, 0);
 
 	/* Enable 64-bit and 16-bit denormals, because there is no performance
 	 * cost.
 	 *
 	 * If denormals are enabled, all floating-point output modifiers are
@@ -5719,21 +5719,21 @@ static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 {
 	if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 		LLVMBuildRetVoid(ctx->ac.builder);
 	else
 		LLVMBuildRet(ctx->ac.builder, ret);
 }
 
 /* Generate code for the hardware VS shader stage to go with a geometry shader */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
-			   LLVMTargetMachineRef tm,
+			   struct si_compiler *compiler,
 			   struct si_shader_selector *gs_selector,
 			   struct pipe_debug_callback *debug)
 {
 	struct si_shader_context ctx;
 	struct si_shader *shader;
 	LLVMBuilderRef builder;
 	struct lp_build_tgsi_context *bld_base = &ctx.bld_base;
 	struct lp_build_context *uint = &bld_base->uint_bld;
 	struct si_shader_output_values *outputs;
 	struct tgsi_shader_info *gsinfo = &gs_selector->info;
@@ -5750,21 +5750,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 		return NULL;
 	}
 
 	/* We can leave the fence as permanently signaled because the GS copy
 	 * shader only becomes visible globally after it has been compiled. */
 	util_queue_fence_init(&shader->ready);
 
 	shader->selector = gs_selector;
 	shader->is_gs_copy_shader = true;
 
-	si_init_shader_ctx(&ctx, sscreen, tm);
+	si_init_shader_ctx(&ctx, sscreen, compiler);
 	ctx.shader = shader;
 	ctx.type = PIPE_SHADER_VERTEX;
 
 	builder = ctx.ac.builder;
 
 	create_function(&ctx);
 	preload_ring_buffers(&ctx);
 
 	LLVMValueRef voffset =
 		lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
@@ -5845,21 +5845,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 	}
 
 	LLVMPositionBuilderAtEnd(builder, end_bb);
 
 	LLVMBuildRetVoid(ctx.ac.builder);
 
 	ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
 	si_llvm_optimize_module(&ctx);
 
 	r = si_compile_llvm(sscreen, &ctx.shader->binary,
-			    &ctx.shader->config, ctx.tm,
+			    &ctx.shader->config, ctx.compiler,
 			    ctx.gallivm.module,
 			    debug, PIPE_SHADER_GEOMETRY,
 			    "GS Copy Shader");
 	if (!r) {
 		if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
 			fprintf(stderr, "GS Copy Shader:\n");
 		si_shader_dump(sscreen, ctx.shader, debug,
 			       PIPE_SHADER_GEOMETRY, stderr, true);
 		r = si_shader_binary_upload(sscreen, ctx.shader);
 	}
@@ -5967,25 +5967,25 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
 	     processor == PIPE_SHADER_TESS_EVAL ||
 	     processor == PIPE_SHADER_VERTEX) &&
 	    !key->as_es && !key->as_ls) {
 		fprintf(f, "  opt.kill_outputs = 0x%"PRIx64"\n", key->opt.kill_outputs);
 		fprintf(f, "  opt.clip_disable = %u\n", key->opt.clip_disable);
 	}
 }
 
 static void si_init_shader_ctx(struct si_shader_context *ctx,
 			       struct si_screen *sscreen,
-			       LLVMTargetMachineRef tm)
+			       struct si_compiler *compiler)
 {
 	struct lp_build_tgsi_context *bld_base;
 
-	si_llvm_context_init(ctx, sscreen, tm);
+	si_llvm_context_init(ctx, sscreen, compiler);
 
 	bld_base = &ctx->bld_base;
 	bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
 
 	bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
 	bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
 	bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
 
 	bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
 
@@ -6749,41 +6749,41 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 					num_out_sgpr = num_out;
 				}
 			}
 		}
 	}
 
 	LLVMBuildRetVoid(builder);
 }
 
 int si_compile_tgsi_shader(struct si_screen *sscreen,
-			   LLVMTargetMachineRef tm,
+			   struct si_compiler *compiler,
 			   struct si_shader *shader,
 			   bool is_monolithic,
 			   struct pipe_debug_callback *debug)
 {
 	struct si_shader_selector *sel = shader->selector;
 	struct si_shader_context ctx;
 	int r = -1;
 
 	/* Dump TGSI code before doing TGSI->LLVM conversion in case the
 	 * conversion fails. */
 	if (si_can_dump_shader(sscreen, sel->info.processor) &&
 	    !(sscreen->debug_flags & DBG(NO_TGSI))) {
 		if (sel->tokens)
 			tgsi_dump(sel->tokens, 0);
 		else
 			nir_print_shader(sel->nir, stderr);
 		si_dump_streamout(&sel->so);
 	}
 
-	si_init_shader_ctx(&ctx, sscreen, tm);
+	si_init_shader_ctx(&ctx, sscreen, compiler);
 	si_llvm_context_set_tgsi(&ctx, shader);
 	ctx.separate_prolog = !is_monolithic;
 
 	memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
 	       sizeof(shader->info.vs_output_param_offset));
 
 	shader->info.uses_instanceid = sel->info.uses_instanceid;
 
 	if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
 		si_llvm_dispose(&ctx);
@@ -6979,21 +6979,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 	    si_can_dump_shader(sscreen, ctx.type)) {
 		ctx.shader->config.private_mem_vgprs =
 			ac_count_scratch_private_memory(ctx.main_fn);
 	}
 
 	/* Make sure the input is a pointer and not integer followed by inttoptr. */
 	assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==
 	       LLVMPointerTypeKind);
 
 	/* Compile to bytecode. */
-	r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
+	r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
 			    ctx.gallivm.module, debug, ctx.type, "TGSI shader");
 	si_llvm_dispose(&ctx);
 	if (r) {
 		fprintf(stderr, "LLVM failed to compile shader\n");
 		return r;
 	}
 
 	/* Validate SGPR and VGPR usage for compute to detect compiler bugs.
 	 * LLVM 3.9svn has this bug.
 	 */
@@ -7090,21 +7090,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
  * \param debug		debug callback
  * \param build		the callback responsible for building the main function
  * \return		non-NULL on success
  */
 static struct si_shader_part *
 si_get_shader_part(struct si_screen *sscreen,
 		   struct si_shader_part **list,
 		   enum pipe_shader_type type,
 		   bool prolog,
 		   union si_shader_part_key *key,
-		   LLVMTargetMachineRef tm,
+		   struct si_compiler *compiler,
 		   struct pipe_debug_callback *debug,
 		   void (*build)(struct si_shader_context *,
 				 union si_shader_part_key *),
 		   const char *name)
 {
 	struct si_shader_part *result;
 
 	mtx_lock(&sscreen->shader_parts_mutex);
 
 	/* Find existing. */
@@ -7115,21 +7115,21 @@ si_get_shader_part(struct si_screen *sscreen,
 		}
 	}
 
 	/* Compile a new one. */
 	result = CALLOC_STRUCT(si_shader_part);
 	result->key = *key;
 
 	struct si_shader shader = {};
 	struct si_shader_context ctx;
 
-	si_init_shader_ctx(&ctx, sscreen, tm);
+	si_init_shader_ctx(&ctx, sscreen, compiler);
 	ctx.shader = &shader;
 	ctx.type = type;
 
 	switch (type) {
 	case PIPE_SHADER_VERTEX:
 		shader.key.as_ls = key->vs_prolog.as_ls;
 		shader.key.as_es = key->vs_prolog.as_es;
 		break;
 	case PIPE_SHADER_TESS_CTRL:
 		assert(!prolog);
@@ -7146,21 +7146,21 @@ si_get_shader_part(struct si_screen *sscreen,
 		break;
 	default:
 		unreachable("bad shader part");
 	}
 
 	build(&ctx, key);
 
 	/* Compile. */
 	si_llvm_optimize_module(&ctx);
 
-	if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
+	if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler,
 			    ctx.ac.module, debug, ctx.type, name)) {
 		FREE(result);
 		result = NULL;
 		goto out;
 	}
 
 	result->next = *list;
 	*list = result;
 
 out:
@@ -7336,53 +7336,53 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
 
 		index = ac_to_float(&ctx->ac, index);
 		ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index,
 					   fninfo.num_params + i, "");
 	}
 
 	si_llvm_build_ret(ctx, ret);
 }
 
 static bool si_get_vs_prolog(struct si_screen *sscreen,
-			     LLVMTargetMachineRef tm,
+			     struct si_compiler *compiler,
 			     struct si_shader *shader,
 			     struct pipe_debug_callback *debug,
 			     struct si_shader *main_part,
 			     const struct si_vs_prolog_bits *key)
 {
 	struct si_shader_selector *vs = main_part->selector;
 
 	if (!si_vs_needs_prolog(vs, key))
 		return true;
 
 	/* Get the prolog. */
 	union si_shader_part_key prolog_key;
 	si_get_vs_prolog_key(&vs->info, main_part->info.num_input_sgprs,
 			     key, shader, &prolog_key);
 
 	shader->prolog =
 		si_get_shader_part(sscreen, &sscreen->vs_prologs,
-				   PIPE_SHADER_VERTEX, true, &prolog_key, tm,
+				   PIPE_SHADER_VERTEX, true, &prolog_key, compiler,
 				   debug, si_build_vs_prolog_function,
 				   "Vertex Shader Prolog");
 	return shader->prolog != NULL;
 }
 
 /**
  * Select and compile (or reuse) vertex shader parts (prolog & epilog).
  */
 static bool si_shader_select_vs_parts(struct si_screen *sscreen,
-				      LLVMTargetMachineRef tm,
+				      struct si_compiler *compiler,
 				      struct si_shader *shader,
 				      struct pipe_debug_callback *debug)
 {
-	return si_get_vs_prolog(sscreen, tm, shader, debug, shader,
+	return si_get_vs_prolog(sscreen, compiler, shader, debug, shader,
 				&shader->key.part.vs.prolog);
 }
 
 /**
  * Compile the TCS epilog function. This writes tesselation factors to memory
  * based on the output primitive type of the tesselator (determined by TES).
  */
 static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 					 union si_shader_part_key *key)
 {
@@ -7453,78 +7453,78 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
 			      LLVMGetParam(func, tess_factors_idx + 2),
 			      invoc0_tess_factors, invoc0_tess_factors + 4);
 
 	LLVMBuildRetVoid(ctx->ac.builder);
 }
 
 /**
  * Select and compile (or reuse) TCS parts (epilog).
  */
 static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
-				       LLVMTargetMachineRef tm,
+				       struct si_compiler *compiler,
 				       struct si_shader *shader,
 				       struct pipe_debug_callback *debug)
 {
 	if (sscreen->info.chip_class >= GFX9) {
 		struct si_shader *ls_main_part =
 			shader->key.part.tcs.ls->main_shader_part_ls;
 
-		if (!si_get_vs_prolog(sscreen, tm, shader, debug, ls_main_part,
+		if (!si_get_vs_prolog(sscreen, compiler, shader, debug, ls_main_part,
 				      &shader->key.part.tcs.ls_prolog))
 			return false;
 
 		shader->previous_stage = ls_main_part;
 	}
 
 	/* Get the epilog. */
 	union si_shader_part_key epilog_key;
 	memset(&epilog_key, 0, sizeof(epilog_key));
 	epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
 
 	shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
 					    PIPE_SHADER_TESS_CTRL, false,
-					    &epilog_key, tm, debug,
+					    &epilog_key, compiler, debug,
 					    si_build_tcs_epilog_function,
 					    "Tessellation Control Shader Epilog");
 	return shader->epilog != NULL;
 }
 
 /**
  * Select and compile (or reuse) GS parts (prolog).
  */
 static bool si_shader_select_gs_parts(struct si_screen *sscreen,
-				      LLVMTargetMachineRef tm,
+				      struct si_compiler *compiler,
 				      struct si_shader *shader,
 				      struct pipe_debug_callback *debug)
 {
 	if (sscreen->info.chip_class >= GFX9) {
 		struct si_shader *es_main_part =
 			shader->key.part.gs.es->main_shader_part_es;
 
 		if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX &&
-		    !si_get_vs_prolog(sscreen, tm, shader, debug, es_main_part,
+		    !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
 				      &shader->key.part.gs.vs_prolog))
 			return false;
 
 		shader->previous_stage = es_main_part;
 	}
 
 	if (!shader->key.part.gs.prolog.tri_strip_adj_fix)
 		return true;
 
 	union si_shader_part_key prolog_key;
 	memset(&prolog_key, 0, sizeof(prolog_key));
 	prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
 
 	shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs,
 					    PIPE_SHADER_GEOMETRY, true,
-					    &prolog_key, tm, debug,
+					    &prolog_key, compiler, debug,
 					    si_build_gs_prolog_function,
 					    "Geometry Shader Prolog");
 	return shader->prolog2 != NULL;
 }
 
 /**
  * Build the pixel shader prolog function. This handles:
  * - two-side color selection and interpolation
  * - overriding interpolation parameters for the API PS
  * - polygon stippling
@@ -7898,49 +7898,49 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
 		si_emit_ps_exports(ctx, &exp);
 
 	/* Compile. */
 	LLVMBuildRetVoid(ctx->ac.builder);
 }
 
 /**
  * Select and compile (or reuse) pixel shader parts (prolog & epilog).
  */
 static bool si_shader_select_ps_parts(struct si_screen *sscreen,
-				      LLVMTargetMachineRef tm,
+				      struct si_compiler *compiler,
 				      struct si_shader *shader,
 				      struct pipe_debug_callback *debug)
 {
 	union si_shader_part_key prolog_key;
 	union si_shader_part_key epilog_key;
 
 	/* Get the prolog. */
 	si_get_ps_prolog_key(shader, &prolog_key, true);
 
 	/* The prolog is a no-op if these aren't set. */
 	if (si_need_ps_prolog(&prolog_key)) {
 		shader->prolog =
 			si_get_shader_part(sscreen, &sscreen->ps_prologs,
 					   PIPE_SHADER_FRAGMENT, true,
-					   &prolog_key, tm, debug,
+					   &prolog_key, compiler, debug,
 					   si_build_ps_prolog_function,
 					   "Fragment Shader Prolog");
 		if (!shader->prolog)
 			return false;
 	}
 
 	/* Get the epilog. */
 	si_get_ps_epilog_key(shader, &epilog_key);
 
 	shader->epilog =
 		si_get_shader_part(sscreen, &sscreen->ps_epilogs,
 				   PIPE_SHADER_FRAGMENT, false,
-				   &epilog_key, tm, debug,
+				   &epilog_key, compiler, debug,
 				   si_build_ps_epilog_function,
 				   "Fragment Shader Epilog");
 	if (!shader->epilog)
 		return false;
 
 	/* Enable POS_FIXED_PT if polygon stippling is enabled. */
 	if (shader->key.part.ps.prolog.poly_stipple) {
 		shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
 		assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
 	}
@@ -8029,39 +8029,39 @@ static void si_fix_resource_usage(struct si_screen *sscreen,
 
 	shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
 
 	if (shader->selector->type == PIPE_SHADER_COMPUTE &&
 	    si_get_max_workgroup_size(shader) > 64) {
 		si_multiwave_lds_size_workaround(sscreen,
 						 &shader->config.lds_size);
 	}
 }
 
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug)
 {
 	struct si_shader_selector *sel = shader->selector;
 	struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
 	int r;
 
 	/* LS, ES, VS are compiled on demand if the main part hasn't been
 	 * compiled for that stage.
 	 *
 	 * Vertex shaders are compiled on demand when a vertex fetch
 	 * workaround must be applied.
 	 */
 	if (shader->is_monolithic) {
 		/* Monolithic shader (compiled as a whole, has many variants,
 		 * may take a long time to compile).
 		 */
-		r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+		r = si_compile_tgsi_shader(sscreen, compiler, shader, true, debug);
 		if (r)
 			return r;
 	} else {
 		/* The shader consists of several parts:
 		 *
 		 * - the middle part is the user shader, it has 1 variant only
 		 *   and it was compiled during the creation of the shader
 		 *   selector
 		 * - the prolog part is inserted at the beginning
 		 * - the epilog part is inserted at the end
@@ -8087,35 +8087,35 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 		memcpy(shader->info.vs_output_param_offset,
 		       mainp->info.vs_output_param_offset,
 		       sizeof(mainp->info.vs_output_param_offset));
 		shader->info.uses_instanceid = mainp->info.uses_instanceid;
 		shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
 		shader->info.nr_param_exports = mainp->info.nr_param_exports;
 
 		/* Select prologs and/or epilogs. */
 		switch (sel->type) {
 		case PIPE_SHADER_VERTEX:
-			if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
+			if (!si_shader_select_vs_parts(sscreen, compiler, shader, debug))
 				return -1;
 			break;
 		case PIPE_SHADER_TESS_CTRL:
-			if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug))
+			if (!si_shader_select_tcs_parts(sscreen, compiler, shader, debug))
 				return -1;
 			break;
 		case PIPE_SHADER_TESS_EVAL:
 			break;
 		case PIPE_SHADER_GEOMETRY:
-			if (!si_shader_select_gs_parts(sscreen, tm, shader, debug))
+			if (!si_shader_select_gs_parts(sscreen, compiler, shader, debug))
 				return -1;
 			break;
 		case PIPE_SHADER_FRAGMENT:
-			if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
+			if (!si_shader_select_ps_parts(sscreen, compiler, shader, debug))
 				return -1;
 
 			/* Make sure we have at least as many VGPRs as there
 			 * are allocated inputs.
 			 */
 			shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
 							shader->info.num_input_vgprs);
 			break;
 		}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c26ccafdd69..8c479d638a9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -304,25 +304,30 @@ enum {
 	SI_FIX_FETCH_RGB_64_FLOAT,
 	SI_FIX_FETCH_RGBA_64_FLOAT,
 	SI_FIX_FETCH_RGB_8,	/* A = 1.0 */
 	SI_FIX_FETCH_RGB_8_INT,	/* A = 1 */
 	SI_FIX_FETCH_RGB_16,
 	SI_FIX_FETCH_RGB_16_INT,
 };
 
 struct si_shader;
 
+/* Per-thread persistent LLVM objects. */
+struct si_compiler {
+	LLVMTargetMachineRef		tm;
+};
+
 /* State of the context creating the shader object. */
 struct si_compiler_ctx_state {
 	/* Should only be used by si_init_shader_selector_async and
 	 * si_build_shader_variant if thread_index == -1 (non-threaded). */
-	LLVMTargetMachineRef		tm;
+	struct si_compiler		*compiler;
 
 	/* Used if thread_index == -1 or if debug.async is true. */
 	struct pipe_debug_callback	debug;
 
 	/* Used for creating the log string for gallium/ddebug. */
 	bool				is_debug_context;
 };
 
 /* A shader selector is a gallium CSO and contains shader variants and
  * binaries for one TGSI program. This can be shared by multiple contexts.
@@ -639,29 +644,29 @@ struct si_shader {
 struct si_shader_part {
 	struct si_shader_part *next;
 	union si_shader_part_key key;
 	struct ac_shader_binary binary;
 	struct si_shader_config config;
 };
 
 /* si_shader.c */
 struct si_shader *
 si_generate_gs_copy_shader(struct si_screen *sscreen,
-			   LLVMTargetMachineRef tm,
+			   struct si_compiler *compiler,
 			   struct si_shader_selector *gs_selector,
 			   struct pipe_debug_callback *debug);
 int si_compile_tgsi_shader(struct si_screen *sscreen,
-			   LLVMTargetMachineRef tm,
+			   struct si_compiler *compiler,
 			   struct si_shader *shader,
 			   bool is_monolithic,
 			   struct pipe_debug_callback *debug);
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
 unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
 		    struct pipe_debug_callback *debug, unsigned processor,
 		    FILE *f, bool check_debug_option);
 void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index afcc14e38cc..0a347172d62 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -172,21 +172,21 @@ struct si_shader_context {
 	/* API GS */
 	int param_gs2vs_offset;
 	int param_gs_wave_id; /* GFX6 */
 	LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
 	int param_gs_vtx01_offset; /* in dwords (GFX9) */
 	int param_gs_vtx23_offset; /* in dwords (GFX9) */
 	int param_gs_vtx45_offset; /* in dwords (GFX9) */
 	/* CS */
 	int param_block_size;
 
-	LLVMTargetMachineRef tm;
+	struct si_compiler *compiler;
 
 	/* Preloaded descriptors. */
 	LLVMValueRef esgs_ring;
 	LLVMValueRef gsvs_ring[4];
 	LLVMValueRef tess_offchip_ring;
 
 	LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
 	LLVMValueRef gs_next_vertex[4];
 	LLVMValueRef postponed_kill;
 	LLVMValueRef return_value;
@@ -214,36 +214,36 @@ si_shader_context(struct lp_build_tgsi_context *bld_base)
 }
 
 static inline struct si_shader_context *
 si_shader_context_from_abi(struct ac_shader_abi *abi)
 {
 	struct si_shader_context *ctx = NULL;
 	return container_of(abi, ctx, abi);
 }
 
 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
-			 LLVMTargetMachineRef tm,
+			 struct si_compiler *compiler,
 			 struct pipe_debug_callback *debug);
 
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 			  enum tgsi_opcode_type type);
 
 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 		     enum tgsi_opcode_type type, LLVMValueRef value);
 
 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 				 LLVMValueRef index,
 				 unsigned num);
 
 void si_llvm_context_init(struct si_shader_context *ctx,
 			  struct si_screen *sscreen,
-			  LLVMTargetMachineRef tm);
+			  struct si_compiler *compiler);
 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
 			      struct si_shader *shader);
 
 void si_llvm_create_func(struct si_shader_context *ctx,
 			 const char *name,
 			 LLVMTypeRef *return_types, unsigned num_return_elems,
 			 LLVMTypeRef *ParamTypes, unsigned ParamCount);
 
 void si_llvm_dispose(struct si_shader_context *ctx);
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index acd7e0b2b2f..d0332a6078c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -92,42 +92,43 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 
 	LLVMDisposeMessage(description);
 }
 
 /**
  * Compile an LLVM module to machine code.
  *
  * @returns 0 for success, 1 for failure
  */
 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
-			 LLVMTargetMachineRef tm,
+			 struct si_compiler *compiler,
 			 struct pipe_debug_callback *debug)
 {
 	struct si_llvm_diagnostics diag;
 	char *err;
 	LLVMContextRef llvm_ctx;
 	LLVMMemoryBufferRef out_buffer;
 	unsigned buffer_size;
 	const char *buffer_data;
 	LLVMBool mem_err;
 
 	diag.debug = debug;
 	diag.retval = 0;
 
 	/* Setup Diagnostic Handler*/
 	llvm_ctx = LLVMGetModuleContext(M);
 
 	LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 
 	/* Compile IR*/
-	mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
-								 &out_buffer);
+	mem_err = LLVMTargetMachineEmitToMemoryBuffer(compiler->tm, M,
+						      LLVMObjectFile, &err,
+						      &out_buffer);
 
 	/* Process Errors/Warnings */
 	if (mem_err) {
 		fprintf(stderr, "%s: %s", __FUNCTION__, err);
 		pipe_debug_message(debug, SHADER_INFO,
 				   "LLVM emit error: %s", err);
 		FREE(err);
 		diag.retval = 1;
 		goto out;
 	}
@@ -985,39 +986,39 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base,
 	for (i = 0; i < 4; ++i) {
 		ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
 				LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
 	}
 
 	ctx->imms_num++;
 }
 
 void si_llvm_context_init(struct si_shader_context *ctx,
 			  struct si_screen *sscreen,
-			  LLVMTargetMachineRef tm)
+			  struct si_compiler *compiler)
 {
 	struct lp_type type;
 
 	/* Initialize the gallivm object:
 	 * We are only using the module, context, and builder fields of this struct.
 	 * This should be enough for us to be able to pass our gallivm struct to the
 	 * helper functions in the gallivm module.
 	 */
 	memset(ctx, 0, sizeof(*ctx));
 	ctx->screen = sscreen;
-	ctx->tm = tm;
+	ctx->compiler = compiler;
 
 	ctx->gallivm.context = LLVMContextCreate();
 	ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
 						ctx->gallivm.context);
 	LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
 
-	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
+	LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(compiler->tm);
 	char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
 	LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
 	LLVMDisposeTargetData(data_layout);
 	LLVMDisposeMessage(data_layout_str);
 
 	bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
 	enum ac_float_mode float_mode =
 		unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
 				AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
 
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f23ce098208..2e215b91ce8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1481,40 +1481,40 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
 	if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT)))
 		memset(&key->opt, 0, sizeof(key->opt));
 }
 
 static void si_build_shader_variant(struct si_shader *shader,
 				    int thread_index,
 				    bool low_priority)
 {
 	struct si_shader_selector *sel = shader->selector;
 	struct si_screen *sscreen = sel->screen;
-	LLVMTargetMachineRef tm;
+	struct si_compiler *compiler;
 	struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
 	int r;
 
 	if (thread_index >= 0) {
 		if (low_priority) {
-			assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority));
-			tm = sscreen->tm_low_priority[thread_index];
+			assert(thread_index < ARRAY_SIZE(sscreen->compiler_lowp));
+			compiler = &sscreen->compiler_lowp[thread_index];
 		} else {
-			assert(thread_index < ARRAY_SIZE(sscreen->tm));
-			tm = sscreen->tm[thread_index];
+			assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+			compiler = &sscreen->compiler[thread_index];
 		}
 		if (!debug->async)
 			debug = NULL;
 	} else {
 		assert(!low_priority);
-		tm = shader->compiler_ctx_state.tm;
+		compiler = shader->compiler_ctx_state.compiler;
 	}
 
-	r = si_shader_create(sscreen, tm, shader, debug);
+	r = si_shader_create(sscreen, compiler, shader, debug);
 	if (unlikely(r)) {
 		PRINT_ERR("Failed to build shader variant (type=%u) %d\n",
 			 sel->type, r);
 		shader->compilation_failed = true;
 		return;
 	}
 
 	if (shader->compiler_ctx_state.is_debug_context) {
 		FILE *f = open_memstream(&shader->shader_log,
 					 &shader->shader_log_size);
@@ -1553,21 +1553,21 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
 
 		/* We can leave the fence as permanently signaled because the
 		 * main part becomes visible globally only after it has been
 		 * compiled. */
 		util_queue_fence_init(&main_part->ready);
 
 		main_part->selector = sel;
 		main_part->key.as_es = key->as_es;
 		main_part->key.as_ls = key->as_ls;
 
-		if (si_compile_tgsi_shader(sscreen, compiler_state->tm,
+		if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
 					   main_part, false,
 					   &compiler_state->debug) != 0) {
 			FREE(main_part);
 			return false;
 		}
 		*mainp = main_part;
 	}
 	return true;
 }
 
@@ -1828,27 +1828,27 @@ static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
 
 /**
  * Compile the main shader part or the monolithic shader as part of
  * si_shader_selector initialization. Since it can be done asynchronously,
  * there is no way to report compile failures to applications.
  */
 static void si_init_shader_selector_async(void *job, int thread_index)
 {
 	struct si_shader_selector *sel = (struct si_shader_selector *)job;
 	struct si_screen *sscreen = sel->screen;
-	LLVMTargetMachineRef tm;
+	struct si_compiler *compiler;
 	struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
 
 	assert(!debug->debug_message || debug->async);
 	assert(thread_index >= 0);
-	assert(thread_index < ARRAY_SIZE(sscreen->tm));
-	tm = sscreen->tm[thread_index];
+	assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+	compiler = &sscreen->compiler[thread_index];
 
 	/* Compile the main shader part for use with a prolog and/or epilog.
 	 * If this fails, the driver will try to compile a monolithic shader
 	 * on demand.
 	 */
 	if (!sscreen->use_monolithic_shaders) {
 		struct si_shader *shader = CALLOC_STRUCT(si_shader);
 		void *ir_binary = NULL;
 
 		if (!shader) {
@@ -1872,21 +1872,21 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 		mtx_lock(&sscreen->shader_cache_mutex);
 
 		if (ir_binary &&
 		    si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
 			mtx_unlock(&sscreen->shader_cache_mutex);
 			si_shader_dump_stats_for_shader_db(shader, debug);
 		} else {
 			mtx_unlock(&sscreen->shader_cache_mutex);
 
 			/* Compile the shader if it hasn't been loaded from the cache. */
-			if (si_compile_tgsi_shader(sscreen, tm, shader, false,
+			if (si_compile_tgsi_shader(sscreen, compiler, shader, false,
 						   debug) != 0) {
 				FREE(shader);
 				FREE(ir_binary);
 				fprintf(stderr, "radeonsi: can't compile a main shader part\n");
 				return;
 			}
 
 			if (ir_binary) {
 				mtx_lock(&sscreen->shader_cache_mutex);
 				if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
@@ -1935,21 +1935,21 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 				case TGSI_SEMANTIC_CLIPVERTEX:
 				case TGSI_SEMANTIC_EDGEFLAG:
 					break;
 				}
 			}
 		}
 	}
 
 	/* The GS copy shader is always pre-compiled. */
 	if (sel->type == PIPE_SHADER_GEOMETRY) {
-		sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, tm, sel, debug);
+		sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
 		if (!sel->gs_copy_shader) {
 			fprintf(stderr, "radeonsi: can't create GS copy shader\n");
 			return;
 		}
 
 		si_shader_vs(sscreen, sel->gs_copy_shader, sel);
 	}
 }
 
 /* Return descriptor slot usage masks from the given shader info. */
@@ -3127,21 +3127,21 @@ bool si_update_shaders(struct si_context *sctx)
 	struct pipe_context *ctx = (struct pipe_context*)sctx;
 	struct si_compiler_ctx_state compiler_state;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	struct si_shader *old_vs = si_get_vs_state(sctx);
 	bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false;
 	struct si_shader *old_ps = sctx->ps_shader.current;
 	unsigned old_spi_shader_col_format =
 		old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
 	int r;
 
-	compiler_state.tm = sctx->tm;
+	compiler_state.compiler = &sctx->compiler;
 	compiler_state.debug = sctx->debug;
 	compiler_state.is_debug_context = sctx->is_debug;
 
 	/* Update stages before GS. */
 	if (sctx->tes_shader.cso) {
 		if (!sctx->tess_rings) {
 			si_init_tess_factor_ring(sctx);
 			if (!sctx->tess_rings)
 				return false;
 		}
-- 
2.17.0