[Mesa-dev] [PATCH 3/3] radeon/llvm: Improve codegen for KILL_IF

Tom Stellard thomas.stellard at amd.com
Mon Apr 13 11:21:04 PDT 2015


Rather than emitting one kill instruction per component of KILL_IF's src
reg, we now or the components of the src register together and use the
result as a condition for just one kill instruction.

shader-db stats (bonaire):

979 shaders
Totals:
SGPRS: 34872 -> 34848 (-0.07 %)
VGPRS: 20696 -> 20676 (-0.10 %)
Code Size: 749032 -> 748452 (-0.08 %) bytes
LDS: 11 -> 11 (0.00 %) blocks
Scratch: 12288 -> 12288 (0.00 %) bytes per wave

Totals from affected shaders:
SGPRS: 1184 -> 1160 (-2.03 %)
VGPRS: 600 -> 580 (-3.33 %)
Code Size: 13200 -> 12620 (-4.39 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave

Increases:
SGPRS: 2 (0.00 %)
VGPRS: 0 (0.00 %)
Code Size: 0 (0.00 %)
LDS: 0 (0.00 %)
Scratch: 0 (0.00 %)

Decreases:
SGPRS: 5 (0.01 %)
VGPRS: 5 (0.01 %)
Code Size: 25 (0.03 %)
LDS: 0 (0.00 %)
Scratch: 0 (0.00 %)

*** BY PERCENTAGE ***

Max Increase:

SGPRS: 32 -> 40 (25.00 %)
VGPRS: 0 -> 0 (0.00 %)
Code Size: 0 -> 0 (0.00 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave

Max Decrease:

SGPRS: 32 -> 24 (-25.00 %)
VGPRS: 16 -> 12 (-25.00 %)
Code Size: 116 -> 96 (-17.24 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave

*** BY UNIT ***

Max Increase:

SGPRS: 64 -> 72 (12.50 %)
VGPRS: 0 -> 0 (0.00 %)
Code Size: 0 -> 0 (0.00 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave

Max Decrease:

SGPRS: 32 -> 24 (-25.00 %)
VGPRS: 16 -> 12 (-25.00 %)
Code Size: 424 -> 356 (-16.04 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave
---
 .../drivers/radeon/radeon_setup_tgsi_llvm.c        | 29 ++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 91c56a3..18afbcb 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -637,6 +637,34 @@ static void uif_emit(
 	if_cond_emit(action, bld_base, emit_data, cond);
 }
 
+static void kill_if_fetch_args(
+	struct lp_build_tgsi_context * bld_base,
+	struct lp_build_emit_data * emit_data)
+{
+	const struct tgsi_full_instruction * inst = emit_data->inst;
+	struct gallivm_state *gallivm = bld_base->base.gallivm;
+	LLVMBuilderRef builder = gallivm->builder;
+	unsigned i;
+	LLVMValueRef conds[TGSI_NUM_CHANNELS];
+
+	for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+		LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
+		conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+					bld_base->base.zero, "");
+	}
+
+	/* Or the conditions together */
+	for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
+		conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+	}
+
+	emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+	emit_data->arg_count = 1;
+	emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
+					lp_build_const_float(gallivm, -1.0f),
+					bld_base->base.zero, "");
+}
+
 static void kil_emit(
 	const struct lp_build_tgsi_action * action,
 	struct lp_build_tgsi_context * bld_base,
@@ -1467,6 +1495,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
 	bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
 	bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
 	bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
+	bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
 	bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
 	bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
-- 
2.0.4



More information about the mesa-dev mailing list