[Mesa-dev] [PATCH 1/9] radeonsi: add struct si_compiler containing LLVMTargetMachineRef
Marek Olšák
maraeo at gmail.com
Tue Apr 17 00:52:12 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
It will contain more variables.
---
src/gallium/drivers/radeonsi/si_compute.c | 8 +--
src/gallium/drivers/radeonsi/si_pipe.c | 52 ++++++++-------
src/gallium/drivers/radeonsi/si_pipe.h | 6 +-
src/gallium/drivers/radeonsi/si_shader.c | 66 +++++++++----------
src/gallium/drivers/radeonsi/si_shader.h | 13 ++--
.../drivers/radeonsi/si_shader_internal.h | 6 +-
.../drivers/radeonsi/si_shader_tgsi_setup.c | 13 ++--
.../drivers/radeonsi/si_state_shaders.c | 28 ++++----
8 files changed, 101 insertions(+), 91 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c
index 69c3dce0124..e95e79c7b46 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -79,27 +79,27 @@ static void code_object_to_config(const amd_kernel_code_t *code_object,
out_config->scratch_bytes_per_wave =
align(code_object->workitem_private_segment_byte_size * 64, 1024);
}
/* Asynchronous compute shader compilation. */
static void si_create_compute_state_async(void *job, int thread_index)
{
struct si_compute *program = (struct si_compute *)job;
struct si_shader *shader = &program->shader;
struct si_shader_selector sel;
- LLVMTargetMachineRef tm;
+ struct si_compiler *compiler;
struct pipe_debug_callback *debug = &program->compiler_ctx_state.debug;
assert(!debug->debug_message || debug->async);
assert(thread_index >= 0);
- assert(thread_index < ARRAY_SIZE(program->screen->tm));
- tm = program->screen->tm[thread_index];
+ assert(thread_index < ARRAY_SIZE(program->screen->compiler));
+ compiler = &program->screen->compiler[thread_index];
memset(&sel, 0, sizeof(sel));
sel.screen = program->screen;
if (program->ir_type == PIPE_SHADER_IR_TGSI) {
tgsi_scan_shader(program->ir.tgsi, &sel.info);
sel.tokens = program->ir.tgsi;
} else {
assert(program->ir_type == PIPE_SHADER_IR_NIR);
@@ -116,21 +116,21 @@ static void si_create_compute_state_async(void *job, int thread_index)
&program->active_const_and_shader_buffers,
&program->active_samplers_and_images);
program->shader.selector = &sel;
program->shader.is_monolithic = true;
program->uses_grid_size = sel.info.uses_grid_size;
program->uses_block_size = sel.info.uses_block_size;
program->uses_bindless_samplers = sel.info.uses_bindless_samplers;
program->uses_bindless_images = sel.info.uses_bindless_images;
- if (si_shader_create(program->screen, tm, &program->shader, debug)) {
+ if (si_shader_create(program->screen, compiler, &program->shader, debug)) {
program->shader.compilation_failed = true;
} else {
bool scratch_enabled = shader->config.scratch_bytes_per_wave > 0;
unsigned user_sgprs = SI_NUM_RESOURCE_SGPRS +
(sel.info.uses_grid_size ? 3 : 0) +
(sel.info.uses_block_size ? 3 : 0);
shader->config.rsrc1 =
S_00B848_VGPRS((shader->config.num_vgprs - 1) / 4) |
S_00B848_SGPRS((shader->config.num_sgprs - 1) / 8) |
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3de843af11c..fcf3556bcc1 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -95,20 +95,38 @@ static const struct debug_named_value debug_options[] = {
/* Tests: */
{ "testdma", DBG(TEST_DMA), "Invoke SDMA tests and exit." },
{ "testvmfaultcp", DBG(TEST_VMFAULT_CP), "Invoke a CP VM fault test and exit." },
{ "testvmfaultsdma", DBG(TEST_VMFAULT_SDMA), "Invoke a SDMA VM fault test and exit." },
{ "testvmfaultshader", DBG(TEST_VMFAULT_SHADER), "Invoke a shader VM fault test and exit." },
DEBUG_NAMED_VALUE_END /* must be last */
};
+static void si_init_compiler(struct si_screen *sscreen,
+ struct si_compiler *compiler)
+{
+ enum ac_target_machine_options tm_options =
+ (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
+ (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
+ (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
+ (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
+
+ compiler->tm = ac_create_target_machine(sscreen->info.family, tm_options);
+}
+
+static void si_destroy_compiler(struct si_compiler *compiler)
+{
+ if (compiler->tm)
+ LLVMDisposeTargetMachine(compiler->tm);
+}
+
/*
* pipe_context
*/
static void si_destroy_context(struct pipe_context *context)
{
struct si_context *sctx = (struct si_context *)context;
int i;
/* Unreference the framebuffer normally to disable related logic
* properly.
@@ -193,21 +211,21 @@ static void si_destroy_context(struct pipe_context *context)
slab_destroy_child(&sctx->pool_transfers);
slab_destroy_child(&sctx->pool_transfers_unsync);
if (sctx->allocator_zeroed_memory)
u_suballocator_destroy(sctx->allocator_zeroed_memory);
sctx->ws->fence_reference(&sctx->last_gfx_fence, NULL);
sctx->ws->fence_reference(&sctx->last_sdma_fence, NULL);
r600_resource_reference(&sctx->eop_bug_scratch, NULL);
- LLVMDisposeTargetMachine(sctx->tm);
+ si_destroy_compiler(&sctx->compiler);
si_saved_cs_reference(&sctx->current_saved_cs, NULL);
_mesa_hash_table_destroy(sctx->tex_handles, NULL);
_mesa_hash_table_destroy(sctx->img_handles, NULL);
util_dynarray_fini(&sctx->resident_tex_handles);
util_dynarray_fini(&sctx->resident_img_handles);
util_dynarray_fini(&sctx->resident_tex_needs_color_decompress);
util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
@@ -278,32 +296,20 @@ static void si_emit_string_marker(struct pipe_context *ctx,
const char *string, int len)
{
struct si_context *sctx = (struct si_context *)ctx;
dd_parse_apitrace_marker(string, len, &sctx->apitrace_call_number);
if (sctx->log)
u_log_printf(sctx->log, "\nString marker: %*s\n", len, string);
}
-static LLVMTargetMachineRef
-si_create_llvm_target_machine(struct si_screen *sscreen)
-{
- enum ac_target_machine_options tm_options =
- (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
- (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
- (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
- (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0);
-
- return ac_create_target_machine(sscreen->info.family, tm_options);
-}
-
static void si_set_debug_callback(struct pipe_context *ctx,
const struct pipe_debug_callback *cb)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_screen *screen = sctx->screen;
util_queue_finish(&screen->shader_compiler_queue);
util_queue_finish(&screen->shader_compiler_queue_low_priority);
if (cb)
@@ -543,21 +549,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
* 1 threadgroup, so that the hw doesn't hang from being unable
* to start any.
*
* The recommended value is 4 per CU at most. Higher numbers don't
* bring much benefit, but they still occupy chip resources (think
* async compute). I've seen ~2% performance difference between 4 and 32.
*/
sctx->scratch_waves = MAX2(32 * sscreen->info.num_good_compute_units,
max_threads_per_block / 64);
- sctx->tm = si_create_llvm_target_machine(sscreen);
+ si_init_compiler(sscreen, &sctx->compiler);
/* Bindless handles. */
sctx->tex_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
sctx->img_handles = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
_mesa_key_pointer_equal);
util_dynarray_init(&sctx->resident_tex_handles, NULL);
util_dynarray_init(&sctx->resident_img_handles, NULL);
util_dynarray_init(&sctx->resident_tex_needs_color_decompress, NULL);
@@ -616,27 +622,25 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
sscreen->ps_epilogs
};
unsigned i;
if (!sscreen->ws->unref(sscreen->ws))
return;
util_queue_destroy(&sscreen->shader_compiler_queue);
util_queue_destroy(&sscreen->shader_compiler_queue_low_priority);
- for (i = 0; i < ARRAY_SIZE(sscreen->tm); i++)
- if (sscreen->tm[i])
- LLVMDisposeTargetMachine(sscreen->tm[i]);
+ for (i = 0; i < ARRAY_SIZE(sscreen->compiler); i++)
+ si_destroy_compiler(&sscreen->compiler[i]);
- for (i = 0; i < ARRAY_SIZE(sscreen->tm_low_priority); i++)
- if (sscreen->tm_low_priority[i])
- LLVMDisposeTargetMachine(sscreen->tm_low_priority[i]);
+ for (i = 0; i < ARRAY_SIZE(sscreen->compiler_lowp); i++)
+ si_destroy_compiler(&sscreen->compiler_lowp[i]);
/* Free shader parts. */
for (i = 0; i < ARRAY_SIZE(parts); i++) {
while (parts[i]) {
struct si_shader_part *part = parts[i];
parts[i] = part->next;
ac_shader_binary_clean(&part->binary);
FREE(part);
}
@@ -857,23 +861,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
return NULL;
}
si_disk_cache_create(sscreen);
/* Only enable as many threads as we have target machines, but at most
* the number of CPUs - 1 if there is more than one.
*/
num_threads = sysconf(_SC_NPROCESSORS_ONLN);
num_threads = MAX2(1, num_threads - 1);
- num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->tm));
+ num_compiler_threads = MIN2(num_threads, ARRAY_SIZE(sscreen->compiler));
num_compiler_threads_lowprio =
- MIN2(num_threads, ARRAY_SIZE(sscreen->tm_low_priority));
+ MIN2(num_threads, ARRAY_SIZE(sscreen->compiler_lowp));
if (!util_queue_init(&sscreen->shader_compiler_queue, "si_shader",
32, num_compiler_threads,
UTIL_QUEUE_INIT_RESIZE_IF_FULL)) {
si_destroy_shader_cache(sscreen);
FREE(sscreen);
return NULL;
}
if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
@@ -1023,23 +1027,23 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
SI_CONTEXT_INV_VMEM_L1;
if (sscreen->info.chip_class <= VI) {
sscreen->barrier_flags.cp_to_L2 |= SI_CONTEXT_INV_GLOBAL_L2;
sscreen->barrier_flags.L2_to_cp |= SI_CONTEXT_WRITEBACK_GLOBAL_L2;
}
if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
sscreen->debug_flags |= DBG_ALL_SHADERS;
for (i = 0; i < num_compiler_threads; i++)
- sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
+ si_init_compiler(sscreen, &sscreen->compiler[i]);
for (i = 0; i < num_compiler_threads_lowprio; i++)
- sscreen->tm_low_priority[i] = si_create_llvm_target_machine(sscreen);
+ si_init_compiler(sscreen, &sscreen->compiler_lowp[i]);
/* Create the auxiliary context. This must be done last. */
sscreen->aux_context = si_create_context(&sscreen->b, 0);
if (sscreen->debug_flags & DBG(TEST_DMA))
si_test_dma(sscreen);
if (sscreen->debug_flags & (DBG(TEST_VMFAULT_CP) |
DBG(TEST_VMFAULT_SDMA) |
DBG(TEST_VMFAULT_SHADER)))
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index 823509524d4..54c9b725fcb 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -523,26 +523,26 @@ struct si_screen {
* those as well.
*/
mtx_t shader_cache_mutex;
struct hash_table *shader_cache;
/* Shader compiler queue for multithreaded compilation. */
struct util_queue shader_compiler_queue;
/* Use at most 3 normal compiler threads on quadcore and better.
* Hyperthreaded CPUs report the number of threads, but we want
* the number of cores. */
- LLVMTargetMachineRef tm[3]; /* used by the queue only */
+ struct si_compiler compiler[3]; /* used by the queue only */
struct util_queue shader_compiler_queue_low_priority;
/* Use at most 2 low priority threads on quadcore and better.
* We want to minimize the impact on multithreaded Mesa. */
- LLVMTargetMachineRef tm_low_priority[2]; /* at most 2 threads */
+ struct si_compiler compiler_lowp[2]; /* at most 2 threads */
};
struct si_blend_color {
struct pipe_blend_color state;
bool any_nonzeros;
};
struct si_sampler_view {
struct pipe_sampler_view base;
/* [0..7] = image descriptor
@@ -769,21 +769,21 @@ struct si_context {
void *custom_blend_fmask_decompress;
void *custom_blend_eliminate_fastclear;
void *custom_blend_dcc_decompress;
void *vs_blit_pos;
void *vs_blit_pos_layered;
void *vs_blit_color;
void *vs_blit_color_layered;
void *vs_blit_texcoord;
struct si_screen *screen;
struct pipe_debug_callback debug;
- LLVMTargetMachineRef tm; /* only non-threaded compilation */
+ struct si_compiler compiler; /* only non-threaded compilation */
struct si_shader_ctx_state fixed_func_tcs_shader;
struct r600_resource *wait_mem_scratch;
unsigned wait_mem_number;
uint16_t prefetch_L2_mask;
bool gfx_flush_in_progress:1;
bool gfx_last_ib_is_busy:1;
bool compute_is_busy:1;
unsigned num_gfx_cs_flushes;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 765daa52bcb..ce654bb7610 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -70,21 +70,21 @@ struct si_function_info {
unsigned num_params;
};
enum si_arg_regfile {
ARG_SGPR,
ARG_VGPR
};
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
- LLVMTargetMachineRef tm);
+ struct si_compiler *compiler);
static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data);
static void si_dump_shader_key(unsigned processor, const struct si_shader *shader,
FILE *f);
static void si_build_vs_prolog_function(struct si_shader_context *ctx,
union si_shader_part_key *key);
@@ -5641,21 +5641,21 @@ void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
fprintf(file, "\n");
}
si_shader_dump_stats(sscreen, shader, processor, file,
check_debug_option);
}
static int si_compile_llvm(struct si_screen *sscreen,
struct ac_shader_binary *binary,
struct si_shader_config *conf,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
LLVMModuleRef mod,
struct pipe_debug_callback *debug,
unsigned processor,
const char *name)
{
int r = 0;
unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
if (si_can_dump_shader(sscreen, processor)) {
fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
@@ -5667,21 +5667,21 @@ static int si_compile_llvm(struct si_screen *sscreen,
}
}
if (sscreen->record_llvm_ir) {
char *ir = LLVMPrintModuleToString(mod);
binary->llvm_ir_string = strdup(ir);
LLVMDisposeMessage(ir);
}
if (!si_replace_shader(count, binary)) {
- r = si_llvm_compile(mod, binary, tm, debug);
+ r = si_llvm_compile(mod, binary, compiler, debug);
if (r)
return r;
}
si_shader_binary_read_config(binary, conf, 0);
/* Enable 64-bit and 16-bit denormals, because there is no performance
* cost.
*
* If denormals are enabled, all floating-point output modifiers are
@@ -5719,21 +5719,21 @@ static void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
{
if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
LLVMBuildRetVoid(ctx->ac.builder);
else
LLVMBuildRet(ctx->ac.builder, ret);
}
/* Generate code for the hardware VS shader stage to go with a geometry shader */
struct si_shader *
si_generate_gs_copy_shader(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader_selector *gs_selector,
struct pipe_debug_callback *debug)
{
struct si_shader_context ctx;
struct si_shader *shader;
LLVMBuilderRef builder;
struct lp_build_tgsi_context *bld_base = &ctx.bld_base;
struct lp_build_context *uint = &bld_base->uint_bld;
struct si_shader_output_values *outputs;
struct tgsi_shader_info *gsinfo = &gs_selector->info;
@@ -5750,21 +5750,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
return NULL;
}
/* We can leave the fence as permanently signaled because the GS copy
* shader only becomes visible globally after it has been compiled. */
util_queue_fence_init(&shader->ready);
shader->selector = gs_selector;
shader->is_gs_copy_shader = true;
- si_init_shader_ctx(&ctx, sscreen, tm);
+ si_init_shader_ctx(&ctx, sscreen, compiler);
ctx.shader = shader;
ctx.type = PIPE_SHADER_VERTEX;
builder = ctx.ac.builder;
create_function(&ctx);
preload_ring_buffers(&ctx);
LLVMValueRef voffset =
lp_build_mul_imm(uint, ctx.abi.vertex_id, 4);
@@ -5845,21 +5845,21 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
}
LLVMPositionBuilderAtEnd(builder, end_bb);
LLVMBuildRetVoid(ctx.ac.builder);
ctx.type = PIPE_SHADER_GEOMETRY; /* override for shader dumping */
si_llvm_optimize_module(&ctx);
r = si_compile_llvm(sscreen, &ctx.shader->binary,
- &ctx.shader->config, ctx.tm,
+ &ctx.shader->config, ctx.compiler,
ctx.gallivm.module,
debug, PIPE_SHADER_GEOMETRY,
"GS Copy Shader");
if (!r) {
if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
fprintf(stderr, "GS Copy Shader:\n");
si_shader_dump(sscreen, ctx.shader, debug,
PIPE_SHADER_GEOMETRY, stderr, true);
r = si_shader_binary_upload(sscreen, ctx.shader);
}
@@ -5967,25 +5967,25 @@ static void si_dump_shader_key(unsigned processor, const struct si_shader *shade
processor == PIPE_SHADER_TESS_EVAL ||
processor == PIPE_SHADER_VERTEX) &&
!key->as_es && !key->as_ls) {
fprintf(f, " opt.kill_outputs = 0x%"PRIx64"\n", key->opt.kill_outputs);
fprintf(f, " opt.clip_disable = %u\n", key->opt.clip_disable);
}
}
static void si_init_shader_ctx(struct si_shader_context *ctx,
struct si_screen *sscreen,
- LLVMTargetMachineRef tm)
+ struct si_compiler *compiler)
{
struct lp_build_tgsi_context *bld_base;
- si_llvm_context_init(ctx, sscreen, tm);
+ si_llvm_context_init(ctx, sscreen, compiler);
bld_base = &ctx->bld_base;
bld_base->emit_fetch_funcs[TGSI_FILE_CONSTANT] = fetch_constant;
bld_base->op_actions[TGSI_OPCODE_INTERP_CENTROID] = interp_action;
bld_base->op_actions[TGSI_OPCODE_INTERP_SAMPLE] = interp_action;
bld_base->op_actions[TGSI_OPCODE_INTERP_OFFSET] = interp_action;
bld_base->op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
@@ -6749,41 +6749,41 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
num_out_sgpr = num_out;
}
}
}
}
LLVMBuildRetVoid(builder);
}
int si_compile_tgsi_shader(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
bool is_monolithic,
struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader_context ctx;
int r = -1;
/* Dump TGSI code before doing TGSI->LLVM conversion in case the
* conversion fails. */
if (si_can_dump_shader(sscreen, sel->info.processor) &&
!(sscreen->debug_flags & DBG(NO_TGSI))) {
if (sel->tokens)
tgsi_dump(sel->tokens, 0);
else
nir_print_shader(sel->nir, stderr);
si_dump_streamout(&sel->so);
}
- si_init_shader_ctx(&ctx, sscreen, tm);
+ si_init_shader_ctx(&ctx, sscreen, compiler);
si_llvm_context_set_tgsi(&ctx, shader);
ctx.separate_prolog = !is_monolithic;
memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
sizeof(shader->info.vs_output_param_offset));
shader->info.uses_instanceid = sel->info.uses_instanceid;
if (!si_compile_tgsi_main(&ctx, is_monolithic)) {
si_llvm_dispose(&ctx);
@@ -6979,21 +6979,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
si_can_dump_shader(sscreen, ctx.type)) {
ctx.shader->config.private_mem_vgprs =
ac_count_scratch_private_memory(ctx.main_fn);
}
/* Make sure the input is a pointer and not integer followed by inttoptr. */
assert(LLVMGetTypeKind(LLVMTypeOf(LLVMGetParam(ctx.main_fn, 0))) ==
LLVMPointerTypeKind);
/* Compile to bytecode. */
- r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,
+ r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
ctx.gallivm.module, debug, ctx.type, "TGSI shader");
si_llvm_dispose(&ctx);
if (r) {
fprintf(stderr, "LLVM failed to compile shader\n");
return r;
}
/* Validate SGPR and VGPR usage for compute to detect compiler bugs.
* LLVM 3.9svn has this bug.
*/
@@ -7090,21 +7090,21 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
* \param debug debug callback
* \param build the callback responsible for building the main function
* \return non-NULL on success
*/
static struct si_shader_part *
si_get_shader_part(struct si_screen *sscreen,
struct si_shader_part **list,
enum pipe_shader_type type,
bool prolog,
union si_shader_part_key *key,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct pipe_debug_callback *debug,
void (*build)(struct si_shader_context *,
union si_shader_part_key *),
const char *name)
{
struct si_shader_part *result;
mtx_lock(&sscreen->shader_parts_mutex);
/* Find existing. */
@@ -7115,21 +7115,21 @@ si_get_shader_part(struct si_screen *sscreen,
}
}
/* Compile a new one. */
result = CALLOC_STRUCT(si_shader_part);
result->key = *key;
struct si_shader shader = {};
struct si_shader_context ctx;
- si_init_shader_ctx(&ctx, sscreen, tm);
+ si_init_shader_ctx(&ctx, sscreen, compiler);
ctx.shader = &shader;
ctx.type = type;
switch (type) {
case PIPE_SHADER_VERTEX:
shader.key.as_ls = key->vs_prolog.as_ls;
shader.key.as_es = key->vs_prolog.as_es;
break;
case PIPE_SHADER_TESS_CTRL:
assert(!prolog);
@@ -7146,21 +7146,21 @@ si_get_shader_part(struct si_screen *sscreen,
break;
default:
unreachable("bad shader part");
}
build(&ctx, key);
/* Compile. */
si_llvm_optimize_module(&ctx);
- if (si_compile_llvm(sscreen, &result->binary, &result->config, tm,
+ if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler,
ctx.ac.module, debug, ctx.type, name)) {
FREE(result);
result = NULL;
goto out;
}
result->next = *list;
*list = result;
out:
@@ -7336,53 +7336,53 @@ static void si_build_vs_prolog_function(struct si_shader_context *ctx,
index = ac_to_float(&ctx->ac, index);
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, index,
fninfo.num_params + i, "");
}
si_llvm_build_ret(ctx, ret);
}
static bool si_get_vs_prolog(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug,
struct si_shader *main_part,
const struct si_vs_prolog_bits *key)
{
struct si_shader_selector *vs = main_part->selector;
if (!si_vs_needs_prolog(vs, key))
return true;
/* Get the prolog. */
union si_shader_part_key prolog_key;
si_get_vs_prolog_key(&vs->info, main_part->info.num_input_sgprs,
key, shader, &prolog_key);
shader->prolog =
si_get_shader_part(sscreen, &sscreen->vs_prologs,
- PIPE_SHADER_VERTEX, true, &prolog_key, tm,
+ PIPE_SHADER_VERTEX, true, &prolog_key, compiler,
debug, si_build_vs_prolog_function,
"Vertex Shader Prolog");
return shader->prolog != NULL;
}
/**
* Select and compile (or reuse) vertex shader parts (prolog & epilog).
*/
static bool si_shader_select_vs_parts(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
- return si_get_vs_prolog(sscreen, tm, shader, debug, shader,
+ return si_get_vs_prolog(sscreen, compiler, shader, debug, shader,
&shader->key.part.vs.prolog);
}
/**
* Compile the TCS epilog function. This writes tesselation factors to memory
* based on the output primitive type of the tesselator (determined by TES).
*/
static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
union si_shader_part_key *key)
{
@@ -7453,78 +7453,78 @@ static void si_build_tcs_epilog_function(struct si_shader_context *ctx,
LLVMGetParam(func, tess_factors_idx + 2),
invoc0_tess_factors, invoc0_tess_factors + 4);
LLVMBuildRetVoid(ctx->ac.builder);
}
/**
* Select and compile (or reuse) TCS parts (epilog).
*/
static bool si_shader_select_tcs_parts(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
if (sscreen->info.chip_class >= GFX9) {
struct si_shader *ls_main_part =
shader->key.part.tcs.ls->main_shader_part_ls;
- if (!si_get_vs_prolog(sscreen, tm, shader, debug, ls_main_part,
+ if (!si_get_vs_prolog(sscreen, compiler, shader, debug, ls_main_part,
&shader->key.part.tcs.ls_prolog))
return false;
shader->previous_stage = ls_main_part;
}
/* Get the epilog. */
union si_shader_part_key epilog_key;
memset(&epilog_key, 0, sizeof(epilog_key));
epilog_key.tcs_epilog.states = shader->key.part.tcs.epilog;
shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs,
PIPE_SHADER_TESS_CTRL, false,
- &epilog_key, tm, debug,
+ &epilog_key, compiler, debug,
si_build_tcs_epilog_function,
"Tessellation Control Shader Epilog");
return shader->epilog != NULL;
}
/**
* Select and compile (or reuse) GS parts (prolog).
*/
static bool si_shader_select_gs_parts(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
if (sscreen->info.chip_class >= GFX9) {
struct si_shader *es_main_part =
shader->key.part.gs.es->main_shader_part_es;
if (shader->key.part.gs.es->type == PIPE_SHADER_VERTEX &&
- !si_get_vs_prolog(sscreen, tm, shader, debug, es_main_part,
+ !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
&shader->key.part.gs.vs_prolog))
return false;
shader->previous_stage = es_main_part;
}
if (!shader->key.part.gs.prolog.tri_strip_adj_fix)
return true;
union si_shader_part_key prolog_key;
memset(&prolog_key, 0, sizeof(prolog_key));
prolog_key.gs_prolog.states = shader->key.part.gs.prolog;
shader->prolog2 = si_get_shader_part(sscreen, &sscreen->gs_prologs,
PIPE_SHADER_GEOMETRY, true,
- &prolog_key, tm, debug,
+ &prolog_key, compiler, debug,
si_build_gs_prolog_function,
"Geometry Shader Prolog");
return shader->prolog2 != NULL;
}
/**
* Build the pixel shader prolog function. This handles:
* - two-side color selection and interpolation
* - overriding interpolation parameters for the API PS
* - polygon stippling
@@ -7898,49 +7898,49 @@ static void si_build_ps_epilog_function(struct si_shader_context *ctx,
si_emit_ps_exports(ctx, &exp);
/* Compile. */
LLVMBuildRetVoid(ctx->ac.builder);
}
/**
* Select and compile (or reuse) pixel shader parts (prolog & epilog).
*/
static bool si_shader_select_ps_parts(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
union si_shader_part_key prolog_key;
union si_shader_part_key epilog_key;
/* Get the prolog. */
si_get_ps_prolog_key(shader, &prolog_key, true);
/* The prolog is a no-op if these aren't set. */
if (si_need_ps_prolog(&prolog_key)) {
shader->prolog =
si_get_shader_part(sscreen, &sscreen->ps_prologs,
PIPE_SHADER_FRAGMENT, true,
- &prolog_key, tm, debug,
+ &prolog_key, compiler, debug,
si_build_ps_prolog_function,
"Fragment Shader Prolog");
if (!shader->prolog)
return false;
}
/* Get the epilog. */
si_get_ps_epilog_key(shader, &epilog_key);
shader->epilog =
si_get_shader_part(sscreen, &sscreen->ps_epilogs,
PIPE_SHADER_FRAGMENT, false,
- &epilog_key, tm, debug,
+ &epilog_key, compiler, debug,
si_build_ps_epilog_function,
"Fragment Shader Epilog");
if (!shader->epilog)
return false;
/* Enable POS_FIXED_PT if polygon stippling is enabled. */
if (shader->key.part.ps.prolog.poly_stipple) {
shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
}
@@ -8029,39 +8029,39 @@ static void si_fix_resource_usage(struct si_screen *sscreen,
shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
if (shader->selector->type == PIPE_SHADER_COMPUTE &&
si_get_max_workgroup_size(shader) > 64) {
si_multiwave_lds_size_workaround(sscreen,
&shader->config.lds_size);
}
}
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug)
{
struct si_shader_selector *sel = shader->selector;
struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
int r;
/* LS, ES, VS are compiled on demand if the main part hasn't been
* compiled for that stage.
*
* Vertex shaders are compiled on demand when a vertex fetch
* workaround must be applied.
*/
if (shader->is_monolithic) {
/* Monolithic shader (compiled as a whole, has many variants,
* may take a long time to compile).
*/
- r = si_compile_tgsi_shader(sscreen, tm, shader, true, debug);
+ r = si_compile_tgsi_shader(sscreen, compiler, shader, true, debug);
if (r)
return r;
} else {
/* The shader consists of several parts:
*
* - the middle part is the user shader, it has 1 variant only
* and it was compiled during the creation of the shader
* selector
* - the prolog part is inserted at the beginning
* - the epilog part is inserted at the end
@@ -8087,35 +8087,35 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
memcpy(shader->info.vs_output_param_offset,
mainp->info.vs_output_param_offset,
sizeof(mainp->info.vs_output_param_offset));
shader->info.uses_instanceid = mainp->info.uses_instanceid;
shader->info.nr_pos_exports = mainp->info.nr_pos_exports;
shader->info.nr_param_exports = mainp->info.nr_param_exports;
/* Select prologs and/or epilogs. */
switch (sel->type) {
case PIPE_SHADER_VERTEX:
- if (!si_shader_select_vs_parts(sscreen, tm, shader, debug))
+ if (!si_shader_select_vs_parts(sscreen, compiler, shader, debug))
return -1;
break;
case PIPE_SHADER_TESS_CTRL:
- if (!si_shader_select_tcs_parts(sscreen, tm, shader, debug))
+ if (!si_shader_select_tcs_parts(sscreen, compiler, shader, debug))
return -1;
break;
case PIPE_SHADER_TESS_EVAL:
break;
case PIPE_SHADER_GEOMETRY:
- if (!si_shader_select_gs_parts(sscreen, tm, shader, debug))
+ if (!si_shader_select_gs_parts(sscreen, compiler, shader, debug))
return -1;
break;
case PIPE_SHADER_FRAGMENT:
- if (!si_shader_select_ps_parts(sscreen, tm, shader, debug))
+ if (!si_shader_select_ps_parts(sscreen, compiler, shader, debug))
return -1;
/* Make sure we have at least as many VGPRs as there
* are allocated inputs.
*/
shader->config.num_vgprs = MAX2(shader->config.num_vgprs,
shader->info.num_input_vgprs);
break;
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index c26ccafdd69..8c479d638a9 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -304,25 +304,30 @@ enum {
SI_FIX_FETCH_RGB_64_FLOAT,
SI_FIX_FETCH_RGBA_64_FLOAT,
SI_FIX_FETCH_RGB_8, /* A = 1.0 */
SI_FIX_FETCH_RGB_8_INT, /* A = 1 */
SI_FIX_FETCH_RGB_16,
SI_FIX_FETCH_RGB_16_INT,
};
struct si_shader;
+/* Per-thread persistent LLVM objects. */
+struct si_compiler {
+ LLVMTargetMachineRef tm;
+};
+
/* State of the context creating the shader object. */
struct si_compiler_ctx_state {
/* Should only be used by si_init_shader_selector_async and
* si_build_shader_variant if thread_index == -1 (non-threaded). */
- LLVMTargetMachineRef tm;
+ struct si_compiler *compiler;
/* Used if thread_index == -1 or if debug.async is true. */
struct pipe_debug_callback debug;
/* Used for creating the log string for gallium/ddebug. */
bool is_debug_context;
};
/* A shader selector is a gallium CSO and contains shader variants and
* binaries for one TGSI program. This can be shared by multiple contexts.
@@ -639,29 +644,29 @@ struct si_shader {
struct si_shader_part {
struct si_shader_part *next;
union si_shader_part_key key;
struct ac_shader_binary binary;
struct si_shader_config config;
};
/* si_shader.c */
struct si_shader *
si_generate_gs_copy_shader(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader_selector *gs_selector,
struct pipe_debug_callback *debug);
int si_compile_tgsi_shader(struct si_screen *sscreen,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct si_shader *shader,
bool is_monolithic,
struct pipe_debug_callback *debug);
-int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
+int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug);
void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f, bool check_debug_option);
void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h
index afcc14e38cc..0a347172d62 100644
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -172,21 +172,21 @@ struct si_shader_context {
/* API GS */
int param_gs2vs_offset;
int param_gs_wave_id; /* GFX6 */
LLVMValueRef gs_vtx_offset[6]; /* in dwords (GFX6) */
int param_gs_vtx01_offset; /* in dwords (GFX9) */
int param_gs_vtx23_offset; /* in dwords (GFX9) */
int param_gs_vtx45_offset; /* in dwords (GFX9) */
/* CS */
int param_block_size;
- LLVMTargetMachineRef tm;
+ struct si_compiler *compiler;
/* Preloaded descriptors. */
LLVMValueRef esgs_ring;
LLVMValueRef gsvs_ring[4];
LLVMValueRef tess_offchip_ring;
LLVMValueRef invoc0_tess_factors[6]; /* outer[4], inner[2] */
LLVMValueRef gs_next_vertex[4];
LLVMValueRef postponed_kill;
LLVMValueRef return_value;
@@ -214,36 +214,36 @@ si_shader_context(struct lp_build_tgsi_context *bld_base)
}
static inline struct si_shader_context *
si_shader_context_from_abi(struct ac_shader_abi *abi)
{
struct si_shader_context *ctx = NULL;
return container_of(abi, ctx, abi);
}
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct pipe_debug_callback *debug);
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type);
LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type, LLVMValueRef value);
LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
LLVMValueRef index,
unsigned num);
void si_llvm_context_init(struct si_shader_context *ctx,
struct si_screen *sscreen,
- LLVMTargetMachineRef tm);
+ struct si_compiler *compiler);
void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
struct si_shader *shader);
void si_llvm_create_func(struct si_shader_context *ctx,
const char *name,
LLVMTypeRef *return_types, unsigned num_return_elems,
LLVMTypeRef *ParamTypes, unsigned ParamCount);
void si_llvm_dispose(struct si_shader_context *ctx);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
index acd7e0b2b2f..d0332a6078c 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -92,42 +92,43 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
LLVMDisposeMessage(description);
}
/**
* Compile an LLVM module to machine code.
*
* @returns 0 for success, 1 for failure
*/
unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
- LLVMTargetMachineRef tm,
+ struct si_compiler *compiler,
struct pipe_debug_callback *debug)
{
struct si_llvm_diagnostics diag;
char *err;
LLVMContextRef llvm_ctx;
LLVMMemoryBufferRef out_buffer;
unsigned buffer_size;
const char *buffer_data;
LLVMBool mem_err;
diag.debug = debug;
diag.retval = 0;
/* Setup Diagnostic Handler*/
llvm_ctx = LLVMGetModuleContext(M);
LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
/* Compile IR*/
- mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
- &out_buffer);
+ mem_err = LLVMTargetMachineEmitToMemoryBuffer(compiler->tm, M,
+ LLVMObjectFile, &err,
+ &out_buffer);
/* Process Errors/Warnings */
if (mem_err) {
fprintf(stderr, "%s: %s", __FUNCTION__, err);
pipe_debug_message(debug, SHADER_INFO,
"LLVM emit error: %s", err);
FREE(err);
diag.retval = 1;
goto out;
}
@@ -985,39 +986,39 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base,
for (i = 0; i < 4; ++i) {
ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
}
ctx->imms_num++;
}
void si_llvm_context_init(struct si_shader_context *ctx,
struct si_screen *sscreen,
- LLVMTargetMachineRef tm)
+ struct si_compiler *compiler)
{
struct lp_type type;
/* Initialize the gallivm object:
* We are only using the module, context, and builder fields of this struct.
* This should be enough for us to be able to pass our gallivm struct to the
* helper functions in the gallivm module.
*/
memset(ctx, 0, sizeof(*ctx));
ctx->screen = sscreen;
- ctx->tm = tm;
+ ctx->compiler = compiler;
ctx->gallivm.context = LLVMContextCreate();
ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
ctx->gallivm.context);
LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
- LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
+ LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(compiler->tm);
char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
LLVMDisposeTargetData(data_layout);
LLVMDisposeMessage(data_layout_str);
bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
enum ac_float_mode float_mode =
unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index f23ce098208..2e215b91ce8 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1481,40 +1481,40 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
if (unlikely(sctx->screen->debug_flags & DBG(NO_OPT_VARIANT)))
memset(&key->opt, 0, sizeof(key->opt));
}
static void si_build_shader_variant(struct si_shader *shader,
int thread_index,
bool low_priority)
{
struct si_shader_selector *sel = shader->selector;
struct si_screen *sscreen = sel->screen;
- LLVMTargetMachineRef tm;
+ struct si_compiler *compiler;
struct pipe_debug_callback *debug = &shader->compiler_ctx_state.debug;
int r;
if (thread_index >= 0) {
if (low_priority) {
- assert(thread_index < ARRAY_SIZE(sscreen->tm_low_priority));
- tm = sscreen->tm_low_priority[thread_index];
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler_lowp));
+ compiler = &sscreen->compiler_lowp[thread_index];
} else {
- assert(thread_index < ARRAY_SIZE(sscreen->tm));
- tm = sscreen->tm[thread_index];
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+ compiler = &sscreen->compiler[thread_index];
}
if (!debug->async)
debug = NULL;
} else {
assert(!low_priority);
- tm = shader->compiler_ctx_state.tm;
+ compiler = shader->compiler_ctx_state.compiler;
}
- r = si_shader_create(sscreen, tm, shader, debug);
+ r = si_shader_create(sscreen, compiler, shader, debug);
if (unlikely(r)) {
PRINT_ERR("Failed to build shader variant (type=%u) %d\n",
sel->type, r);
shader->compilation_failed = true;
return;
}
if (shader->compiler_ctx_state.is_debug_context) {
FILE *f = open_memstream(&shader->shader_log,
&shader->shader_log_size);
@@ -1553,21 +1553,21 @@ static bool si_check_missing_main_part(struct si_screen *sscreen,
/* We can leave the fence as permanently signaled because the
* main part becomes visible globally only after it has been
* compiled. */
util_queue_fence_init(&main_part->ready);
main_part->selector = sel;
main_part->key.as_es = key->as_es;
main_part->key.as_ls = key->as_ls;
- if (si_compile_tgsi_shader(sscreen, compiler_state->tm,
+ if (si_compile_tgsi_shader(sscreen, compiler_state->compiler,
main_part, false,
&compiler_state->debug) != 0) {
FREE(main_part);
return false;
}
*mainp = main_part;
}
return true;
}
@@ -1828,27 +1828,27 @@ static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
/**
* Compile the main shader part or the monolithic shader as part of
* si_shader_selector initialization. Since it can be done asynchronously,
* there is no way to report compile failures to applications.
*/
static void si_init_shader_selector_async(void *job, int thread_index)
{
struct si_shader_selector *sel = (struct si_shader_selector *)job;
struct si_screen *sscreen = sel->screen;
- LLVMTargetMachineRef tm;
+ struct si_compiler *compiler;
struct pipe_debug_callback *debug = &sel->compiler_ctx_state.debug;
assert(!debug->debug_message || debug->async);
assert(thread_index >= 0);
- assert(thread_index < ARRAY_SIZE(sscreen->tm));
- tm = sscreen->tm[thread_index];
+ assert(thread_index < ARRAY_SIZE(sscreen->compiler));
+ compiler = &sscreen->compiler[thread_index];
/* Compile the main shader part for use with a prolog and/or epilog.
* If this fails, the driver will try to compile a monolithic shader
* on demand.
*/
if (!sscreen->use_monolithic_shaders) {
struct si_shader *shader = CALLOC_STRUCT(si_shader);
void *ir_binary = NULL;
if (!shader) {
@@ -1872,21 +1872,21 @@ static void si_init_shader_selector_async(void *job, int thread_index)
mtx_lock(&sscreen->shader_cache_mutex);
if (ir_binary &&
si_shader_cache_load_shader(sscreen, ir_binary, shader)) {
mtx_unlock(&sscreen->shader_cache_mutex);
si_shader_dump_stats_for_shader_db(shader, debug);
} else {
mtx_unlock(&sscreen->shader_cache_mutex);
/* Compile the shader if it hasn't been loaded from the cache. */
- if (si_compile_tgsi_shader(sscreen, tm, shader, false,
+ if (si_compile_tgsi_shader(sscreen, compiler, shader, false,
debug) != 0) {
FREE(shader);
FREE(ir_binary);
fprintf(stderr, "radeonsi: can't compile a main shader part\n");
return;
}
if (ir_binary) {
mtx_lock(&sscreen->shader_cache_mutex);
if (!si_shader_cache_insert_shader(sscreen, ir_binary, shader, true))
@@ -1935,21 +1935,21 @@ static void si_init_shader_selector_async(void *job, int thread_index)
case TGSI_SEMANTIC_CLIPVERTEX:
case TGSI_SEMANTIC_EDGEFLAG:
break;
}
}
}
}
/* The GS copy shader is always pre-compiled. */
if (sel->type == PIPE_SHADER_GEOMETRY) {
- sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, tm, sel, debug);
+ sel->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, debug);
if (!sel->gs_copy_shader) {
fprintf(stderr, "radeonsi: can't create GS copy shader\n");
return;
}
si_shader_vs(sscreen, sel->gs_copy_shader, sel);
}
}
/* Return descriptor slot usage masks from the given shader info. */
@@ -3127,21 +3127,21 @@ bool si_update_shaders(struct si_context *sctx)
struct pipe_context *ctx = (struct pipe_context*)sctx;
struct si_compiler_ctx_state compiler_state;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
struct si_shader *old_vs = si_get_vs_state(sctx);
bool old_clip_disable = old_vs ? old_vs->key.opt.clip_disable : false;
struct si_shader *old_ps = sctx->ps_shader.current;
unsigned old_spi_shader_col_format =
old_ps ? old_ps->key.part.ps.epilog.spi_shader_col_format : 0;
int r;
- compiler_state.tm = sctx->tm;
+ compiler_state.compiler = &sctx->compiler;
compiler_state.debug = sctx->debug;
compiler_state.is_debug_context = sctx->is_debug;
/* Update stages before GS. */
if (sctx->tes_shader.cso) {
if (!sctx->tess_rings) {
si_init_tess_factor_ring(sctx);
if (!sctx->tess_rings)
return false;
}
--
2.17.0
More information about the mesa-dev
mailing list