[Mesa-dev] [PATCH] gallium/ttn: add support for temp arrays
Rob Clark
robdclark at gmail.com
Wed Apr 8 12:32:51 PDT 2015
On Wed, Apr 8, 2015 at 11:14 AM, Eric Anholt <eric at anholt.net> wrote:
> Rob Clark <robdclark at gmail.com> writes:
>
>> From: Rob Clark <robclark at freedesktop.org>
>>
>> Since the rest of NIR really would rather have these as variables rather
>> than registers, create a nir_variable per array. But rather than
>> completely re-arrange ttn to be variable based rather than register
>> based, keep the registers. In the cases where there is a matching var
>> for the reg, ttn_emit_instruction will append the appropriate intrinsic
>> to get things back from the shadow reg into the variable.
>>
>> NOTE: this doesn't quite handle TEMP[ADDR[]] when the DCL doesn't give
>> an array id. But those just kinda suck, and should really go away.
>> AFAICT we don't get those from glsl. Might be an issue for some other
>> state tracker.
>>
>> v2: rework to use load_var/store_var with deref chains
>>
>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>> ---
>> src/gallium/auxiliary/nir/tgsi_to_nir.c | 122 +++++++++++++++++++++++++++-----
>> 1 file changed, 103 insertions(+), 19 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
>> index da935a4..f4c0bad 100644
>> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
>> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
>> @@ -44,6 +44,7 @@
>> struct ttn_reg_info {
>> /** nir register containing this TGSI index. */
>> nir_register *reg;
>> + nir_variable *var;
>> /** Offset (in vec4s) from the start of var for this TGSI index. */
>> int offset;
>> };
>> @@ -121,22 +122,29 @@ ttn_emit_declaration(struct ttn_compile *c)
>>
>> if (file == TGSI_FILE_TEMPORARY) {
>> nir_register *reg;
>> - if (c->scan->indirect_files & (1 << file)) {
>> + nir_variable *var = NULL;
>> +
>> + if (decl->Declaration.Array) {
>> + /* for arrays, the register created just serves as a
>> + * shadow register. We append intrinsic_store_global
>> + * after the tgsi instruction is translated to move
>> + * back from the shadow register to the variable
>> + */
>> + var = rzalloc(b->shader, nir_variable);
>> +
>> + var->type = glsl_array_type(glsl_vec4_type(), array_size);
>> + var->data.mode = nir_var_global;
>> + var->name = ralloc_asprintf(var, "arr_%d", decl->Array.ArrayID);
>> +
>> + exec_list_push_tail(&b->shader->globals, &var->node);
>> + }
>> +
>> + for (i = 0; i < array_size; i++) {
>> reg = nir_local_reg_create(b->impl);
>> reg->num_components = 4;
>> - reg->num_array_elems = array_size;
>> -
>> - for (i = 0; i < array_size; i++) {
>> - c->temp_regs[decl->Range.First + i].reg = reg;
>> - c->temp_regs[decl->Range.First + i].offset = i;
>> - }
>> - } else {
>> - for (i = 0; i < array_size; i++) {
>> - reg = nir_local_reg_create(b->impl);
>> - reg->num_components = 4;
>> - c->temp_regs[decl->Range.First + i].reg = reg;
>> - c->temp_regs[decl->Range.First + i].offset = 0;
>> - }
>> + c->temp_regs[decl->Range.First + i].reg = reg;
>> + c->temp_regs[decl->Range.First + i].var = var;
>> + c->temp_regs[decl->Range.First + i].offset = i;
>
> Continuing to use array_size here doesn't make any sense to me, since if
> you're not handling variable array indices when generating stores into
> the array. So all you want is a single vec4 reg available so that you
> have something that our ALU op generation can do writemasked stores
> into, and you're picking an arbitrary one of them in ttn_get_dest().
>
> I think this would make a ton more sense if ttn_get_dest() just returned
> a new vec4 local reg for the temporary, instead of having this
> sort-of-shadow thing.
>
so the shadow registers did make things like:
DCL TEMP[0..2], ARRAY(1), LOCAL
....
1: MOV TEMP[1].x, IN[1].wwww
2: MOV TEMP[1].yz, IN[2].yxyy
much easier to deal with.. I'm still thinking about how to handle that
w/ the create-new-temp-register-each-time approach.. but yeah, doesn't
work as well if you have indirect dst.
BR,
-R
>> }
>> } else if (file == TGSI_FILE_ADDRESS) {
>> c->addr_reg = nir_local_reg_create(b->impl);
>> @@ -245,6 +253,32 @@ ttn_emit_immediate(struct ttn_compile *c)
>> static nir_src *
>> ttn_src_for_indirect(struct ttn_compile *c, struct tgsi_ind_register *indirect);
>>
>> +/* generate either a constant or indirect deref chain for accessing an
>> + * array variable.
>> + */
>> +static nir_deref_var *
>> +ttn_array_deref(struct ttn_compile *c, nir_variable *var, unsigned offset,
>> + struct tgsi_ind_register *indirect)
>> +{
>> + nir_builder *b = &c->build;
>> + nir_deref_var *deref = nir_deref_var_create(b->shader, var);
>> + nir_deref_array *arr = nir_deref_array_create(b->shader);
>> +
>> + arr->base_offset = offset;
>> + arr->deref.type = glsl_get_array_element(var->type);
>> +
>> + if (indirect) {
>> + arr->deref_array_type = nir_deref_array_type_indirect;
>> + arr->indirect = nir_src_for_reg(c->addr_reg);
>> + } else {
>> + arr->deref_array_type = nir_deref_array_type_direct;
>> + }
>> +
>> + deref->deref.child = &arr->deref;
>> +
>> + return deref;
>> +}
>> +
>> static nir_src
>> ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
>> struct tgsi_ind_register *indirect)
>> @@ -256,10 +290,25 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
>>
>> switch (file) {
>> case TGSI_FILE_TEMPORARY:
>> - src.reg.reg = c->temp_regs[index].reg;
>> - src.reg.base_offset = c->temp_regs[index].offset;
>> - if (indirect)
>> - src.reg.indirect = ttn_src_for_indirect(c, indirect);
>> + if (c->temp_regs[index].var) {
>> + unsigned offset = c->temp_regs[index].offset;
>> + nir_variable *var = c->temp_regs[index].var;
>> + nir_intrinsic_instr *load;
>> +
>> + load = nir_intrinsic_instr_create(b->shader,
>> + nir_intrinsic_load_var);
>> + load->num_components = 4;
>> + load->variables[0] = ttn_array_deref(c, var, offset, indirect);
>> +
>> + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
>> + nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
>> +
>> + src = nir_src_for_ssa(&load->dest.ssa);
>> +
>> + } else {
>> + assert(!indirect);
>> + src.reg.reg = c->temp_regs[index].reg;
>> + }
>> break;
>>
>> case TGSI_FILE_ADDRESS:
>> @@ -363,6 +412,21 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
>> return dest;
>> }
>
> I don't see where you deleted the "if (tgsi_dst->Indirect)" line in
> ttn_get_dest, so I'm confused how this works at all since your variable
> stores don't handle the indirect.
>
>>
>> +static nir_variable *
>> +ttn_get_var(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)
>> +{
>> + struct tgsi_dst_register *tgsi_dst = &tgsi_fdst->Register;
>> + unsigned index = tgsi_dst->Index;
>> +
>> + if (tgsi_dst->File == TGSI_FILE_TEMPORARY) {
>> + return c->temp_regs[index].var;
>> + } else if (tgsi_dst->File == TGSI_FILE_OUTPUT) {
>> + return c->output_regs[index].var;
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> static nir_ssa_def *
>> ttn_get_src(struct ttn_compile *c, struct tgsi_full_src_register *tgsi_fsrc)
>> {
>> @@ -1133,6 +1197,7 @@ ttn_emit_instruction(struct ttn_compile *c)
>> struct tgsi_full_instruction *tgsi_inst = &c->token->FullInstruction;
>> unsigned i;
>> unsigned tgsi_op = tgsi_inst->Instruction.Opcode;
>> + struct tgsi_full_dst_register *tgsi_dst = &tgsi_inst->Dst[0];
>>
>> if (tgsi_op == TGSI_OPCODE_END)
>> return;
>> @@ -1141,7 +1206,7 @@ ttn_emit_instruction(struct ttn_compile *c)
>> for (i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++) {
>> src[i] = ttn_get_src(c, &tgsi_inst->Src[i]);
>> }
>> - nir_alu_dest dest = ttn_get_dest(c, &tgsi_inst->Dst[0]);
>> + nir_alu_dest dest = ttn_get_dest(c, tgsi_dst);
>>
>> switch (tgsi_op) {
>> case TGSI_OPCODE_RSQ:
>> @@ -1331,6 +1396,25 @@ ttn_emit_instruction(struct ttn_compile *c)
>> assert(!dest.dest.is_ssa);
>> ttn_move_dest(b, dest, nir_fsat(b, ttn_src_for_dest(b, &dest)));
>> }
>> +
>> + /* if the dst has a matching var, append store_global to move
>> + * output from reg to var
>> + */
>> + nir_variable *var = ttn_get_var(c, tgsi_dst);
>> + if (var) {
>> + unsigned index = tgsi_dst->Register.Index;
>> + unsigned offset = c->temp_regs[index].offset;
>> + nir_intrinsic_instr *store =
>> + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var);
>> + struct tgsi_ind_register *indirect = tgsi_dst->Register.Indirect ?
>> + &tgsi_dst->Indirect : NULL;
>> +
>> + store->num_components = 4;
>> + store->variables[0] = ttn_array_deref(c, var, offset, indirect);
>> + store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
>> +
>> + nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
>> + }
>> }
>
> What about a destination with a writemask? You'd need to load from the
> var and merge in before storing.
More information about the mesa-dev
mailing list