[Mesa-dev] [PATCH 2/3] radeonsi: fix color inputs/outputs for GS and tess
Marek Olšák
maraeo at gmail.com
Wed May 23 21:05:00 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
GS is tested, tessellation is untested.
Have outputs_written_before_ps for HW VS and outputs_written for other
stages. The reason is that COLOR and BCOLOR alias for HW VS, which
drives elimination of VS outputs based on PS inputs.
---
src/gallium/drivers/radeonsi/si_shader.c | 30 ++++++++++++-------
src/gallium/drivers/radeonsi/si_shader.h | 6 ++--
.../drivers/radeonsi/si_state_shaders.c | 18 ++++++-----
3 files changed, 34 insertions(+), 20 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0d24c3af10a..6734a1628f5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -184,21 +184,22 @@ unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned in
assert(!"invalid semantic name");
return 0;
}
}
/**
* Returns a unique index for a semantic name and index. The index must be
* less than 64, so that a 64-bit bitmask of used inputs or outputs can be
* calculated.
*/
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+ unsigned is_varying)
{
switch (semantic_name) {
case TGSI_SEMANTIC_POSITION:
return 0;
case TGSI_SEMANTIC_GENERIC:
/* Since some shader stages use the the highest used IO index
* to determine the size to allocate for inputs/outputs
* (in LDS, tess and GS rings). GENERIC should be placed right
* after POSITION to make that size as small as possible.
*/
@@ -213,28 +214,34 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
assert(index <= 1);
return SI_MAX_IO_GENERIC + 2 + index;
case TGSI_SEMANTIC_FOG:
return SI_MAX_IO_GENERIC + 4;
case TGSI_SEMANTIC_LAYER:
return SI_MAX_IO_GENERIC + 5;
case TGSI_SEMANTIC_VIEWPORT_INDEX:
return SI_MAX_IO_GENERIC + 6;
case TGSI_SEMANTIC_PRIMID:
return SI_MAX_IO_GENERIC + 7;
- case TGSI_SEMANTIC_COLOR: /* these alias */
- case TGSI_SEMANTIC_BCOLOR:
+ case TGSI_SEMANTIC_COLOR:
assert(index < 2);
return SI_MAX_IO_GENERIC + 8 + index;
+ case TGSI_SEMANTIC_BCOLOR:
+ assert(index < 2);
+ /* If it's a varying, COLOR and BCOLOR alias. */
+ if (is_varying)
+ return SI_MAX_IO_GENERIC + 8 + index;
+ else
+ return SI_MAX_IO_GENERIC + 10 + index;
case TGSI_SEMANTIC_TEXCOORD:
assert(index < 8);
- assert(SI_MAX_IO_GENERIC + 10 + index < 64);
- return SI_MAX_IO_GENERIC + 10 + index;
+ STATIC_ASSERT(SI_MAX_IO_GENERIC + 12 + 8 <= 64);
+ return SI_MAX_IO_GENERIC + 12 + index;
default:
assert(!"invalid semantic name");
return 0;
}
}
/**
* Get the value of a shader input parameter and extract a bitfield.
*/
static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx,
@@ -853,21 +860,21 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context
if (param_index) {
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
LLVMBuildMul(ctx->ac.builder, param_index,
LLVMConstInt(ctx->i32, 4, 0), ""), "");
}
int param = is_patch ?
si_shader_io_get_unique_index_patch(name[input_index],
index[input_index]) :
si_shader_io_get_unique_index(name[input_index],
- index[input_index]);
+ index[input_index], false);
/* Add the base address of the element. */
return LLVMBuildAdd(ctx->ac.builder, base_addr,
LLVMConstInt(ctx->i32, param * 4, 0), "");
}
/**
* Calculate a dword address given an input or output register and a stride.
*/
static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
@@ -1008,21 +1015,21 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
LLVMValueRef param_index,
unsigned param_base,
ubyte *name,
ubyte *index,
bool is_patch)
{
unsigned param_index_base;
param_index_base = is_patch ?
si_shader_io_get_unique_index_patch(name[param_base], index[param_base]) :
- si_shader_io_get_unique_index(name[param_base], index[param_base]);
+ si_shader_io_get_unique_index(name[param_base], index[param_base], false);
if (param_index) {
param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
LLVMConstInt(ctx->i32, param_index_base, 0),
"");
} else {
param_index = LLVMConstInt(ctx->i32, param_index_base, 0);
}
return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx),
@@ -1615,21 +1622,21 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
struct si_shader *shader = ctx->shader;
struct lp_build_context *uint = &ctx->bld_base.uint_bld;
LLVMValueRef vtx_offset, soffset;
struct tgsi_shader_info *info = &shader->selector->info;
unsigned semantic_name = info->input_semantic_name[input_index];
unsigned semantic_index = info->input_semantic_index[input_index];
unsigned param;
LLVMValueRef value;
- param = si_shader_io_get_unique_index(semantic_name, semantic_index);
+ param = si_shader_io_get_unique_index(semantic_name, semantic_index, false);
/* GFX9 has the ESGS ring in LDS. */
if (ctx->screen->info.chip_class >= GFX9) {
unsigned index = vtx_offset_param;
switch (index / 2) {
case 0:
vtx_offset = si_unpack_param(ctx, ctx->param_gs_vtx01_offset,
index % 2 ? 16 : 0, 16);
break;
@@ -2909,21 +2916,22 @@ static void si_build_param_exports(struct si_shader_context *ctx,
case TGSI_SEMANTIC_TEXCOORD:
case TGSI_SEMANTIC_GENERIC:
break;
default:
continue;
}
if ((semantic_name != TGSI_SEMANTIC_GENERIC ||
semantic_index < SI_MAX_IO_GENERIC) &&
shader->key.opt.kill_outputs &
- (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
+ (1ull << si_shader_io_get_unique_index(semantic_name,
+ semantic_index, true)))
continue;
si_export_param(ctx, param_count, outputs[i].values);
assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
shader->info.vs_output_param_offset[i] = param_count++;
}
shader->info.nr_param_exports = param_count;
}
@@ -3539,21 +3547,21 @@ static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
* (vertex, tessellation evaluation or geometry) does not
* statically assign to gl_ViewportIndex or gl_Layer, index
* or layer zero is assumed.
*
* So writes to those outputs in VS-as-LS are simply ignored.
*/
if (name == TGSI_SEMANTIC_LAYER ||
name == TGSI_SEMANTIC_VIEWPORT_INDEX)
continue;
- int param = si_shader_io_get_unique_index(name, index);
+ int param = si_shader_io_get_unique_index(name, index, false);
LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
LLVMConstInt(ctx->i32, param * 4, 0), "");
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)))
continue;
lds_store(ctx, chan, dw_addr,
LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
}
@@ -3588,21 +3596,21 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
}
for (i = 0; i < info->num_outputs; i++) {
int param;
if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
continue;
param = si_shader_io_get_unique_index(info->output_semantic_name[i],
- info->output_semantic_index[i]);
+ info->output_semantic_index[i], false);
for (chan = 0; chan < 4; chan++) {
if (!(info->output_usagemask[i] & (1 << chan)))
continue;
LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
out_val = ac_to_integer(&ctx->ac, out_val);
/* GFX9 has the ESGS ring in LDS. */
if (ctx->screen->info.chip_class >= GFX9) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 94366f41204..555ca598d2c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -145,21 +145,21 @@
struct nir_shader;
struct si_shader;
struct si_context;
#define SI_MAX_ATTRIBS 16
#define SI_MAX_VS_OUTPUTS 40
/* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an
* index smaller than this.
*/
-#define SI_MAX_IO_GENERIC 46
+#define SI_MAX_IO_GENERIC 44
/* SGPR user data indices */
enum {
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
#if !HAVE_32BIT_POINTERS
SI_SGPR_RW_BUFFERS_HI,
#endif
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
#if !HAVE_32BIT_POINTERS
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
@@ -386,20 +386,21 @@ struct si_shader_selector {
unsigned color_attr_index[2];
unsigned db_shader_control;
/* Set 0xf or 0x0 (4 bits) per each written output.
* ANDed with spi_shader_col_format.
*/
unsigned colors_written_4bit;
/* CS parameters */
unsigned local_size;
+ uint64_t outputs_written_before_ps; /* "get_unique_index" bits */
uint64_t outputs_written; /* "get_unique_index" bits */
uint32_t patch_outputs_written; /* "get_unique_index_patch" bits */
uint64_t inputs_read; /* "get_unique_index" bits */
/* bitmasks of used descriptor slots */
uint32_t active_const_and_shader_buffers;
uint64_t active_samplers_and_images;
};
@@ -661,21 +662,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
int si_compile_tgsi_shader(struct si_screen *sscreen,
struct si_compiler *compiler,
struct si_shader *shader,
bool is_monolithic,
struct pipe_debug_callback *debug);
int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
struct si_shader *shader,
struct pipe_debug_callback *debug);
void si_shader_destroy(struct si_shader *shader);
unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+ unsigned is_varying);
int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *f, bool check_debug_option);
void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
struct pipe_debug_callback *debug);
void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
unsigned *lds_size);
void si_shader_apply_scratch_relocs(struct si_shader *shader,
uint64_t scratch_va);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 78937aea0f7..4d17082dd9b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1216,26 +1216,26 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS;
unsigned ps_colormask = si_get_total_colormask(sctx);
ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
(!ps_colormask &&
!ps_modifies_zs &&
!ps->info.writes_memory);
}
/* Find out which VS outputs aren't used by the PS. */
- uint64_t outputs_written = vs->outputs_written;
+ uint64_t outputs_written = vs->outputs_written_before_ps;
uint64_t inputs_read = 0;
/* ignore POSITION, PSIZE */
- outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0)) |
- (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0)));
+ outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0, true)) |
+ (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0, true)));
if (!ps_disabled) {
inputs_read = ps->inputs_read;
}
uint64_t linked = outputs_written & inputs_read;
key->opt.kill_outputs = ~linked & outputs_written;
}
@@ -1920,22 +1920,22 @@ static void si_init_shader_selector_async(void *job, int thread_index)
unsigned index = sel->info.output_semantic_index[i];
unsigned id;
switch (name) {
case TGSI_SEMANTIC_GENERIC:
/* don't process indices the function can't handle */
if (index >= SI_MAX_IO_GENERIC)
break;
/* fall through */
default:
- id = si_shader_io_get_unique_index(name, index);
- sel->outputs_written &= ~(1ull << id);
+ id = si_shader_io_get_unique_index(name, index, true);
+ sel->outputs_written_before_ps &= ~(1ull << id);
break;
case TGSI_SEMANTIC_POSITION: /* ignore these */
case TGSI_SEMANTIC_PSIZE:
case TGSI_SEMANTIC_CLIPVERTEX:
case TGSI_SEMANTIC_EDGEFLAG:
break;
}
}
}
}
@@ -2094,50 +2094,54 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
1ull << si_shader_io_get_unique_index_patch(name, index);
break;
case TGSI_SEMANTIC_GENERIC:
/* don't process indices the function can't handle */
if (index >= SI_MAX_IO_GENERIC)
break;
/* fall through */
default:
sel->outputs_written |=
- 1ull << si_shader_io_get_unique_index(name, index);
+ 1ull << si_shader_io_get_unique_index(name, index, false);
+ sel->outputs_written_before_ps |=
+ 1ull << si_shader_io_get_unique_index(name, index, true);
break;
case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
case TGSI_SEMANTIC_EDGEFLAG:
break;
}
}
sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
* conflicts, i.e. each vertex will start at a different bank.
*/
if (sctx->chip_class >= GFX9)
sel->esgs_itemsize += 4;
+
+ assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
break;
case PIPE_SHADER_FRAGMENT:
for (i = 0; i < sel->info.num_inputs; i++) {
unsigned name = sel->info.input_semantic_name[i];
unsigned index = sel->info.input_semantic_index[i];
switch (name) {
case TGSI_SEMANTIC_GENERIC:
/* don't process indices the function can't handle */
if (index >= SI_MAX_IO_GENERIC)
break;
/* fall through */
default:
sel->inputs_read |=
- 1ull << si_shader_io_get_unique_index(name, index);
+ 1ull << si_shader_io_get_unique_index(name, index, true);
break;
case TGSI_SEMANTIC_PCOORD: /* ignore this */
break;
}
}
for (i = 0; i < 8; i++)
if (sel->info.colors_written & (1 << i))
sel->colors_written_4bit |= 0xf << (4 * i);
--
2.17.0
More information about the mesa-dev
mailing list