[Mesa-dev] [PATCH 2/3] radeonsi: fix color inputs/outputs for GS and tess

Marek Olšák maraeo at gmail.com
Wed May 23 21:05:00 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

GS is tested, tessellation is untested.

Have outputs_written_before_ps for HW VS and outputs_written for other
stages. The reason is that COLOR and BCOLOR alias for HW VS, which
drives elimination of VS outputs based on PS inputs.
---
 src/gallium/drivers/radeonsi/si_shader.c      | 30 ++++++++++++-------
 src/gallium/drivers/radeonsi/si_shader.h      |  6 ++--
 .../drivers/radeonsi/si_state_shaders.c       | 18 ++++++-----
 3 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index 0d24c3af10a..6734a1628f5 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -184,21 +184,22 @@ unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned in
 		assert(!"invalid semantic name");
 		return 0;
 	}
 }
 
 /**
  * Returns a unique index for a semantic name and index. The index must be
  * less than 64, so that a 64-bit bitmask of used inputs or outputs can be
  * calculated.
  */
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+				       unsigned is_varying)
 {
 	switch (semantic_name) {
 	case TGSI_SEMANTIC_POSITION:
 		return 0;
 	case TGSI_SEMANTIC_GENERIC:
 		/* Since some shader stages use the the highest used IO index
 		 * to determine the size to allocate for inputs/outputs
 		 * (in LDS, tess and GS rings). GENERIC should be placed right
 		 * after POSITION to make that size as small as possible.
 		 */
@@ -213,28 +214,34 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 		assert(index <= 1);
 		return SI_MAX_IO_GENERIC + 2 + index;
 	case TGSI_SEMANTIC_FOG:
 		return SI_MAX_IO_GENERIC + 4;
 	case TGSI_SEMANTIC_LAYER:
 		return SI_MAX_IO_GENERIC + 5;
 	case TGSI_SEMANTIC_VIEWPORT_INDEX:
 		return SI_MAX_IO_GENERIC + 6;
 	case TGSI_SEMANTIC_PRIMID:
 		return SI_MAX_IO_GENERIC + 7;
-	case TGSI_SEMANTIC_COLOR: /* these alias */
-	case TGSI_SEMANTIC_BCOLOR:
+	case TGSI_SEMANTIC_COLOR:
 		assert(index < 2);
 		return SI_MAX_IO_GENERIC + 8 + index;
+	case TGSI_SEMANTIC_BCOLOR:
+		assert(index < 2);
+		/* If it's a varying, COLOR and BCOLOR alias. */
+		if (is_varying)
+			return SI_MAX_IO_GENERIC + 8 + index;
+		else
+			return SI_MAX_IO_GENERIC + 10 + index;
 	case TGSI_SEMANTIC_TEXCOORD:
 		assert(index < 8);
-		assert(SI_MAX_IO_GENERIC + 10 + index < 64);
-		return SI_MAX_IO_GENERIC + 10 + index;
+		STATIC_ASSERT(SI_MAX_IO_GENERIC + 12 + 8 <= 64);
+		return SI_MAX_IO_GENERIC + 12 + index;
 	default:
 		assert(!"invalid semantic name");
 		return 0;
 	}
 }
 
 /**
  * Get the value of a shader input parameter and extract a bitfield.
  */
 static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx,
@@ -853,21 +860,21 @@ static LLVMValueRef get_dw_address_from_generic_indices(struct si_shader_context
 	if (param_index) {
 		base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr,
 					 LLVMBuildMul(ctx->ac.builder, param_index,
 						      LLVMConstInt(ctx->i32, 4, 0), ""), "");
 	}
 
 	int param = is_patch ?
 		si_shader_io_get_unique_index_patch(name[input_index],
 						    index[input_index]) :
 		si_shader_io_get_unique_index(name[input_index],
-					      index[input_index]);
+					      index[input_index], false);
 
 	/* Add the base address of the element. */
 	return LLVMBuildAdd(ctx->ac.builder, base_addr,
 			    LLVMConstInt(ctx->i32, param * 4, 0), "");
 }
 
 /**
  * Calculate a dword address given an input or output register and a stride.
  */
 static LLVMValueRef get_dw_address(struct si_shader_context *ctx,
@@ -1008,21 +1015,21 @@ static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(
 					LLVMValueRef param_index,
 					unsigned param_base,
 					ubyte *name,
 					ubyte *index,
 					bool is_patch)
 {
 	unsigned param_index_base;
 
 	param_index_base = is_patch ?
 		si_shader_io_get_unique_index_patch(name[param_base], index[param_base]) :
-		si_shader_io_get_unique_index(name[param_base], index[param_base]);
+		si_shader_io_get_unique_index(name[param_base], index[param_base], false);
 
 	if (param_index) {
 		param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
 					   LLVMConstInt(ctx->i32, param_index_base, 0),
 					   "");
 	} else {
 		param_index = LLVMConstInt(ctx->i32, param_index_base, 0);
 	}
 
 	return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx),
@@ -1615,21 +1622,21 @@ LLVMValueRef si_llvm_load_input_gs(struct ac_shader_abi *abi,
 	struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
 	struct si_shader *shader = ctx->shader;
 	struct lp_build_context *uint =	&ctx->bld_base.uint_bld;
 	LLVMValueRef vtx_offset, soffset;
 	struct tgsi_shader_info *info = &shader->selector->info;
 	unsigned semantic_name = info->input_semantic_name[input_index];
 	unsigned semantic_index = info->input_semantic_index[input_index];
 	unsigned param;
 	LLVMValueRef value;
 
-	param = si_shader_io_get_unique_index(semantic_name, semantic_index);
+	param = si_shader_io_get_unique_index(semantic_name, semantic_index, false);
 
 	/* GFX9 has the ESGS ring in LDS. */
 	if (ctx->screen->info.chip_class >= GFX9) {
 		unsigned index = vtx_offset_param;
 
 		switch (index / 2) {
 		case 0:
 			vtx_offset = si_unpack_param(ctx, ctx->param_gs_vtx01_offset,
 						  index % 2 ? 16 : 0, 16);
 			break;
@@ -2909,21 +2916,22 @@ static void si_build_param_exports(struct si_shader_context *ctx,
 		case TGSI_SEMANTIC_TEXCOORD:
 		case TGSI_SEMANTIC_GENERIC:
 			break;
 		default:
 			continue;
 		}
 
 		if ((semantic_name != TGSI_SEMANTIC_GENERIC ||
 		     semantic_index < SI_MAX_IO_GENERIC) &&
 		    shader->key.opt.kill_outputs &
-		    (1ull << si_shader_io_get_unique_index(semantic_name, semantic_index)))
+		    (1ull << si_shader_io_get_unique_index(semantic_name,
+							   semantic_index, true)))
 			continue;
 
 		si_export_param(ctx, param_count, outputs[i].values);
 
 		assert(i < ARRAY_SIZE(shader->info.vs_output_param_offset));
 		shader->info.vs_output_param_offset[i] = param_count++;
 	}
 
 	shader->info.nr_param_exports = param_count;
 }
@@ -3539,21 +3547,21 @@ static void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi,
 		 *    (vertex, tessellation evaluation or geometry) does not
 		 *    statically assign to gl_ViewportIndex or gl_Layer, index
 		 *    or layer zero is assumed.
 		 *
 		 * So writes to those outputs in VS-as-LS are simply ignored.
 		 */
 		if (name == TGSI_SEMANTIC_LAYER ||
 		    name == TGSI_SEMANTIC_VIEWPORT_INDEX)
 			continue;
 
-		int param = si_shader_io_get_unique_index(name, index);
+		int param = si_shader_io_get_unique_index(name, index, false);
 		LLVMValueRef dw_addr = LLVMBuildAdd(ctx->ac.builder, base_dw_addr,
 					LLVMConstInt(ctx->i32, param * 4, 0), "");
 
 		for (chan = 0; chan < 4; chan++) {
 			if (!(info->output_usagemask[i] & (1 << chan)))
 				continue;
 
 			lds_store(ctx, chan, dw_addr,
 				  LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], ""));
 		}
@@ -3588,21 +3596,21 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
 	}
 
 	for (i = 0; i < info->num_outputs; i++) {
 		int param;
 
 		if (info->output_semantic_name[i] == TGSI_SEMANTIC_VIEWPORT_INDEX ||
 		    info->output_semantic_name[i] == TGSI_SEMANTIC_LAYER)
 			continue;
 
 		param = si_shader_io_get_unique_index(info->output_semantic_name[i],
-						      info->output_semantic_index[i]);
+						      info->output_semantic_index[i], false);
 
 		for (chan = 0; chan < 4; chan++) {
 			if (!(info->output_usagemask[i] & (1 << chan)))
 				continue;
 
 			LLVMValueRef out_val = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
 			out_val = ac_to_integer(&ctx->ac, out_val);
 
 			/* GFX9 has the ESGS ring in LDS. */
 			if (ctx->screen->info.chip_class >= GFX9) {
diff --git a/src/gallium/drivers/radeonsi/si_shader.h b/src/gallium/drivers/radeonsi/si_shader.h
index 94366f41204..555ca598d2c 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -145,21 +145,21 @@
 struct nir_shader;
 struct si_shader;
 struct si_context;
 
 #define SI_MAX_ATTRIBS		16
 #define SI_MAX_VS_OUTPUTS	40
 
 /* Shader IO unique indices are supported for TGSI_SEMANTIC_GENERIC with an
  * index smaller than this.
  */
-#define SI_MAX_IO_GENERIC       46
+#define SI_MAX_IO_GENERIC       44
 
 /* SGPR user data indices */
 enum {
 	SI_SGPR_RW_BUFFERS,  /* rings (& stream-out, VS only) */
 #if !HAVE_32BIT_POINTERS
 	SI_SGPR_RW_BUFFERS_HI,
 #endif
 	SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
 #if !HAVE_32BIT_POINTERS
 	SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES_HI,
@@ -386,20 +386,21 @@ struct si_shader_selector {
 	unsigned	color_attr_index[2];
 	unsigned	db_shader_control;
 	/* Set 0xf or 0x0 (4 bits) per each written output.
 	 * ANDed with spi_shader_col_format.
 	 */
 	unsigned	colors_written_4bit;
 
 	/* CS parameters */
 	unsigned local_size;
 
+	uint64_t	outputs_written_before_ps; /* "get_unique_index" bits */
 	uint64_t	outputs_written;	/* "get_unique_index" bits */
 	uint32_t	patch_outputs_written;	/* "get_unique_index_patch" bits */
 
 	uint64_t	inputs_read;		/* "get_unique_index" bits */
 
 	/* bitmasks of used descriptor slots */
 	uint32_t	active_const_and_shader_buffers;
 	uint64_t	active_samplers_and_images;
 };
 
@@ -661,21 +662,22 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
 int si_compile_tgsi_shader(struct si_screen *sscreen,
 			   struct si_compiler *compiler,
 			   struct si_shader *shader,
 			   bool is_monolithic,
 			   struct pipe_debug_callback *debug);
 int si_shader_create(struct si_screen *sscreen, struct si_compiler *compiler,
 		     struct si_shader *shader,
 		     struct pipe_debug_callback *debug);
 void si_shader_destroy(struct si_shader *shader);
 unsigned si_shader_io_get_unique_index_patch(unsigned semantic_name, unsigned index);
-unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index);
+unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index,
+				       unsigned is_varying);
 int si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader);
 void si_shader_dump(struct si_screen *sscreen, const struct si_shader *shader,
 		    struct pipe_debug_callback *debug, unsigned processor,
 		    FILE *f, bool check_debug_option);
 void si_shader_dump_stats_for_shader_db(const struct si_shader *shader,
 					struct pipe_debug_callback *debug);
 void si_multiwave_lds_size_workaround(struct si_screen *sscreen,
 				      unsigned *lds_size);
 void si_shader_apply_scratch_relocs(struct si_shader *shader,
 				    uint64_t scratch_va);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 78937aea0f7..4d17082dd9b 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1216,26 +1216,26 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
 				      si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS;
 		unsigned ps_colormask = si_get_total_colormask(sctx);
 
 		ps_disabled = sctx->queued.named.rasterizer->rasterizer_discard ||
 			      (!ps_colormask &&
 			       !ps_modifies_zs &&
 			       !ps->info.writes_memory);
 	}
 
 	/* Find out which VS outputs aren't used by the PS. */
-	uint64_t outputs_written = vs->outputs_written;
+	uint64_t outputs_written = vs->outputs_written_before_ps;
 	uint64_t inputs_read = 0;
 
 	/* ignore POSITION, PSIZE */
-	outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0)) |
-			     (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0)));
+	outputs_written &= ~((1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_POSITION, 0, true)) |
+			     (1ull << si_shader_io_get_unique_index(TGSI_SEMANTIC_PSIZE, 0, true)));
 
 	if (!ps_disabled) {
 		inputs_read = ps->inputs_read;
 	}
 
 	uint64_t linked = outputs_written & inputs_read;
 
 	key->opt.kill_outputs = ~linked & outputs_written;
 }
 
@@ -1920,22 +1920,22 @@ static void si_init_shader_selector_async(void *job, int thread_index)
 				unsigned index = sel->info.output_semantic_index[i];
 				unsigned id;
 
 				switch (name) {
 				case TGSI_SEMANTIC_GENERIC:
 					/* don't process indices the function can't handle */
 					if (index >= SI_MAX_IO_GENERIC)
 						break;
 					/* fall through */
 				default:
-					id = si_shader_io_get_unique_index(name, index);
-					sel->outputs_written &= ~(1ull << id);
+					id = si_shader_io_get_unique_index(name, index, true);
+					sel->outputs_written_before_ps &= ~(1ull << id);
 					break;
 				case TGSI_SEMANTIC_POSITION: /* ignore these */
 				case TGSI_SEMANTIC_PSIZE:
 				case TGSI_SEMANTIC_CLIPVERTEX:
 				case TGSI_SEMANTIC_EDGEFLAG:
 					break;
 				}
 			}
 		}
 	}
@@ -2094,50 +2094,54 @@ static void *si_create_shader_selector(struct pipe_context *ctx,
 					1ull << si_shader_io_get_unique_index_patch(name, index);
 				break;
 
 			case TGSI_SEMANTIC_GENERIC:
 				/* don't process indices the function can't handle */
 				if (index >= SI_MAX_IO_GENERIC)
 					break;
 				/* fall through */
 			default:
 				sel->outputs_written |=
-					1ull << si_shader_io_get_unique_index(name, index);
+					1ull << si_shader_io_get_unique_index(name, index, false);
+				sel->outputs_written_before_ps |=
+					1ull << si_shader_io_get_unique_index(name, index, true);
 				break;
 			case TGSI_SEMANTIC_CLIPVERTEX: /* ignore these */
 			case TGSI_SEMANTIC_EDGEFLAG:
 				break;
 			}
 		}
 		sel->esgs_itemsize = util_last_bit64(sel->outputs_written) * 16;
 
 		/* For the ESGS ring in LDS, add 1 dword to reduce LDS bank
 		 * conflicts, i.e. each vertex will start at a different bank.
 		 */
 		if (sctx->chip_class >= GFX9)
 			sel->esgs_itemsize += 4;
+
+		assert(((sel->esgs_itemsize / 4) & C_028AAC_ITEMSIZE) == 0);
 		break;
 
 	case PIPE_SHADER_FRAGMENT:
 		for (i = 0; i < sel->info.num_inputs; i++) {
 			unsigned name = sel->info.input_semantic_name[i];
 			unsigned index = sel->info.input_semantic_index[i];
 
 			switch (name) {
 			case TGSI_SEMANTIC_GENERIC:
 				/* don't process indices the function can't handle */
 				if (index >= SI_MAX_IO_GENERIC)
 					break;
 				/* fall through */
 			default:
 				sel->inputs_read |=
-					1ull << si_shader_io_get_unique_index(name, index);
+					1ull << si_shader_io_get_unique_index(name, index, true);
 				break;
 			case TGSI_SEMANTIC_PCOORD: /* ignore this */
 				break;
 			}
 		}
 
 		for (i = 0; i < 8; i++)
 			if (sel->info.colors_written & (1 << i))
 				sel->colors_written_4bit |= 0xf << (4 * i);
 
-- 
2.17.0



More information about the mesa-dev mailing list