[Mesa-dev] [PATCH 3/3] gallium/ttn: use single component address register
Rob Clark
robdclark at gmail.com
Wed Apr 8 15:34:08 PDT 2015
From: Rob Clark <robclark at freedesktop.org>
Only needs to be a vec1, and this helps out the later opt stages. From
the shader (after opt) for fs-temp-array-mat3-index-col-row-wr goes,
before:
vec1 ssa_408 = imul ssa_155, ssa_1
vec4 ssa_413 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w
vec4 ssa_166 = intrinsic load_uniform () () (0, 1)
vec4 ssa_772 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z
intrinsic store_var (ssa_772) (arr_5[ssa_413]) ()
vec4 ssa_416 = vec4 ssa_408, ssa_412.y, ssa_412.z, ssa_412.w
vec4 ssa_178 = intrinsic load_uniform () () (1, 1)
vec4 ssa_787 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z
intrinsic store_var (ssa_787) (arr_5[1 + ssa_416]) ()
vec4 ssa_190 = intrinsic load_uniform () () (2, 1)
vec4 ssa_802 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z
intrinsic store_var (ssa_802) (arr_5[2 + ssa_416]) ()
after:
vec1 ssa_408 = imul ssa_155, ssa_1
vec4 ssa_166 = intrinsic load_uniform () () (0, 1)
vec4 ssa_763 = vec4 ssa_166, ssa_166.y, ssa_166.z, ssa_166.z
intrinsic store_var (ssa_763) (arr_5[ssa_408]) ()
vec4 ssa_178 = intrinsic load_uniform () () (1, 1)
vec4 ssa_778 = vec4 ssa_178, ssa_178.y, ssa_178.z, ssa_178.z
intrinsic store_var (ssa_778) (arr_5[1 + ssa_408]) ()
vec4 ssa_190 = intrinsic load_uniform () () (2, 1)
vec4 ssa_793 = vec4 ssa_190, ssa_190.y, ssa_190.z, ssa_190.z
intrinsic store_var (ssa_793) (arr_5[2 + ssa_408]) ()
ie. it realizes the indirect is the same for all three store_var's
which avoids my backend generating duplicate (mov (shl (cov)))
instruction chains.
v2: add assert, and get rid of pointless imov in other indirect paths
Signed-off-by: Rob Clark <robclark at freedesktop.org>
---
src/gallium/auxiliary/nir/tgsi_to_nir.c | 12 +++++-------
1 file changed, 5 insertions(+), 7 deletions(-)
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index c3332cc..648ac6f 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -152,7 +152,7 @@ ttn_emit_declaration(struct ttn_compile *c)
}
} else if (file == TGSI_FILE_ADDRESS) {
c->addr_reg = nir_local_reg_create(b->impl);
- c->addr_reg->num_components = 4;
+ c->addr_reg->num_components = 1;
} else if (file == TGSI_FILE_SAMPLER) {
/* Nothing to record for samplers. */
} else {
@@ -350,12 +350,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
load->const_index[0] = index;
load->const_index[1] = 1;
if (indirect) {
- nir_alu_src indirect_address;
- memset(&indirect_address, 0, sizeof(indirect_address));
- indirect_address.src = nir_src_for_reg(c->addr_reg);
- for (int i = 0; i < 4; i++)
- indirect_address.swizzle[i] = indirect->Swizzle;
- load->src[0] = nir_src_for_ssa(nir_imov_alu(b, indirect_address, 1));
+ assert(indirect->Swizzle == TGSI_SWIZZLE_X);
+ load->src[0] = nir_src_for_reg(c->addr_reg);
}
nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
@@ -500,6 +496,8 @@ ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc)
tgsi_src->Index,
(tgsi_src->Indirect ?
&tgsi_fsrc->Indirect : NULL));
+ if (tgsi_src->File == TGSI_FILE_ADDRESS)
+ assert(tgsi_src->SwizzleX == TGSI_SWIZZLE_X);
}
src.swizzle[0] = tgsi_src->SwizzleX;
--
2.1.0
More information about the mesa-dev
mailing list