[Mesa-dev] [PATCH 3/3] radeon/llvm: Improve codegen for KILL_IF
Tom Stellard
thomas.stellard at amd.com
Mon Apr 13 11:21:04 PDT 2015
Rather than emitting one kill instruction per component of KILL_IF's src
reg, we now or the components of the src register together and use the
result as a condition for just one kill instruction.
shader-db stats (bonaire):
979 shaders
Totals:
SGPRS: 34872 -> 34848 (-0.07 %)
VGPRS: 20696 -> 20676 (-0.10 %)
Code Size: 749032 -> 748452 (-0.08 %) bytes
LDS: 11 -> 11 (0.00 %) blocks
Scratch: 12288 -> 12288 (0.00 %) bytes per wave
Totals from affected shaders:
SGPRS: 1184 -> 1160 (-2.03 %)
VGPRS: 600 -> 580 (-3.33 %)
Code Size: 13200 -> 12620 (-4.39 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave
Increases:
SGPRS: 2 (0.00 %)
VGPRS: 0 (0.00 %)
Code Size: 0 (0.00 %)
LDS: 0 (0.00 %)
Scratch: 0 (0.00 %)
Decreases:
SGPRS: 5 (0.01 %)
VGPRS: 5 (0.01 %)
Code Size: 25 (0.03 %)
LDS: 0 (0.00 %)
Scratch: 0 (0.00 %)
*** BY PERCENTAGE ***
Max Increase:
SGPRS: 32 -> 40 (25.00 %)
VGPRS: 0 -> 0 (0.00 %)
Code Size: 0 -> 0 (0.00 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave
Max Decrease:
SGPRS: 32 -> 24 (-25.00 %)
VGPRS: 16 -> 12 (-25.00 %)
Code Size: 116 -> 96 (-17.24 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave
*** BY UNIT ***
Max Increase:
SGPRS: 64 -> 72 (12.50 %)
VGPRS: 0 -> 0 (0.00 %)
Code Size: 0 -> 0 (0.00 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave
Max Decrease:
SGPRS: 32 -> 24 (-25.00 %)
VGPRS: 16 -> 12 (-25.00 %)
Code Size: 424 -> 356 (-16.04 %) bytes
LDS: 0 -> 0 (0.00 %) blocks
Scratch: 0 -> 0 (0.00 %) bytes per wave
---
.../drivers/radeon/radeon_setup_tgsi_llvm.c | 29 ++++++++++++++++++++++
1 file changed, 29 insertions(+)
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
index 91c56a3..18afbcb 100644
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -637,6 +637,34 @@ static void uif_emit(
if_cond_emit(action, bld_base, emit_data, cond);
}
+static void kill_if_fetch_args(
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ const struct tgsi_full_instruction * inst = emit_data->inst;
+ struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ unsigned i;
+ LLVMValueRef conds[TGSI_NUM_CHANNELS];
+
+ for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
+ LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
+ conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
+ bld_base->base.zero, "");
+ }
+
+ /* Or the conditions together */
+ for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
+ conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
+ }
+
+ emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
+ emit_data->arg_count = 1;
+ emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
+ lp_build_const_float(gallivm, -1.0f),
+ bld_base->base.zero, "");
+}
+
static void kil_emit(
const struct lp_build_tgsi_action * action,
struct lp_build_tgsi_context * bld_base,
@@ -1467,6 +1495,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
+ bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
--
2.0.4
More information about the mesa-dev
mailing list