[Mesa-dev] [PATCH 1/2] i965/vec4: Add a helper function to emit VS_OPCODE_PULL_CONSTANT_LOAD

Wed Apr 15 11:12:11 PDT 2015

On Wed, Apr 15, 2015 at 06:58:01PM +0100, Neil Roberts wrote:
> There were three places in the visitor that had a similar chunk of
> code to emit the VS_OPCODE_PULL_CONSTANT_LOAD opcode using a register
> for the offset. This patch combines the chunks into a helper function
> to reduce the code duplication. It will also be useful in the next
> patch to expand what happens on Gen9+. This shouldn't introduce any
> functional changes.

Hopefully you agreed and didn't just do it for me :-)
Reviewed-by: Ben Widawsky <ben at bwidawsk.net>

> ---
>  src/mesa/drivers/dri/i965/brw_vec4.h           |   5 ++
>  src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 120 +++++++++++++------------
>  src/mesa/drivers/dri/i965/brw_vec4_vp.cpp      |  27 ++----
>  3 files changed, 75 insertions(+), 77 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
> index 700ca69..0363924 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4.h
> +++ b/src/mesa/drivers/dri/i965/brw_vec4.h
> @@ -364,6 +364,11 @@ public:
>  				dst_reg dst,
>  				src_reg orig_src,
>  				int base_offset);
> +   void emit_pull_constant_load_reg(dst_reg dst,
> +                                    src_reg surf_index,
> +                                    src_reg offset,
> +                                    bblock_t *before_block,
> +                                    vec4_instruction *before_inst);
>     src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
>                                  vec4_instruction *inst, src_reg src);
>  
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> index ffbe04d..f7d542b 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
> @@ -1296,6 +1296,63 @@ vec4_visitor::emit_lrp(const dst_reg &dst,
>     }
>  }
>  
> +/**
> + * Emits the instructions needed to perform a pull constant load. before_block
> + * and before_inst can be NULL in which case the instruction will be appended
> + * to the end of the instruction list.
> + */
> +void
> +vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
> +                                          src_reg surf_index,
> +                                          src_reg offset_reg,
> +                                          bblock_t *before_block,
> +                                          vec4_instruction *before_inst)
> +{
> +   assert((before_inst == NULL && before_block == NULL) ||
> +          (before_inst && before_block));
> +
> +   vec4_instruction *pull;
> +
> +   if (brw->gen >= 7) {
> +      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
> +
> +      /* We have to use a message header on Skylake to get SIMD4x2 mode.
> +       * Reserve space for the register.
> +       */
> +      if (brw->gen >= 9) {
> +         grf_offset.reg_offset++;
> +         alloc.sizes[grf_offset.reg] = 2;
> +      }
> +
> +      grf_offset.type = offset_reg.type;
> +
> +      pull = MOV(grf_offset, offset_reg);
> +
> +      if (before_inst)
> +         emit_before(before_block, before_inst, pull);
> +      else
> +         emit(pull);
> +
> +      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
> +                                           dst,
> +                                           surf_index,
> +                                           src_reg(grf_offset));
> +      pull->mlen = 1;
> +   } else {
> +      pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
> +                                           dst,
> +                                           surf_index,
> +                                           offset_reg);
> +      pull->base_mrf = 14;
> +      pull->mlen = 1;
> +   }
> +
> +   if (before_inst)
> +      emit_before(before_block, before_inst, pull);
> +   else
> +      emit(pull);
> +}
> +
>  void
>  vec4_visitor::visit(ir_expression *ir)
>  {
> @@ -1774,36 +1831,10 @@ vec4_visitor::visit(ir_expression *ir)
>           emit(SHR(dst_reg(offset), op[1], src_reg(4)));
>        }
>  
> -      if (brw->gen >= 7) {
> -         dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
> -
> -         /* We have to use a message header on Skylake to get SIMD4x2 mode.
> -          * Reserve space for the register.
> -          */
> -         if (brw->gen >= 9) {
> -            grf_offset.reg_offset++;
> -            alloc.sizes[grf_offset.reg] = 2;
> -         }
> -
> -         grf_offset.type = offset.type;
> -
> -         emit(MOV(grf_offset, offset));
> -
> -         vec4_instruction *pull =
> -            emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
> -                                               dst_reg(packed_consts),
> -                                               surf_index,
> -                                               src_reg(grf_offset)));
> -         pull->mlen = 1;
> -      } else {
> -         vec4_instruction *pull =
> -            emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
> -                                               dst_reg(packed_consts),
> -                                               surf_index,
> -                                               offset));
> -         pull->base_mrf = 14;
> -         pull->mlen = 1;
> -      }
> +      emit_pull_constant_load_reg(dst_reg(packed_consts),
> +                                  surf_index,
> +                                  offset,
> +                                  NULL, NULL /* before_block/inst */);
>  
>        packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
>        packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
> @@ -3475,32 +3506,11 @@ vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
>     src_reg index = src_reg(prog_data->base.binding_table.pull_constants_start);
>     src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
>                                               reg_offset);
> -   vec4_instruction *load;
> -
> -   if (brw->gen >= 7) {
> -      dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
> -
> -      /* We have to use a message header on Skylake to get SIMD4x2 mode.
> -       * Reserve space for the register.
> -       */
> -      if (brw->gen >= 9) {
> -         grf_offset.reg_offset++;
> -         alloc.sizes[grf_offset.reg] = 2;
> -      }
>  
> -      grf_offset.type = offset.type;
> -      emit_before(block, inst, MOV(grf_offset, offset));
> -
> -      load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
> -                                           temp, index, src_reg(grf_offset));
> -      load->mlen = 1;
> -   } else {
> -      load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
> -                                           temp, index, offset);
> -      load->base_mrf = 14;
> -      load->mlen = 1;
> -   }
> -   emit_before(block, inst, load);
> +   emit_pull_constant_load_reg(temp,
> +                               index,
> +                               offset,
> +                               block, inst);
>  }
>  
>  /**
> diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> index c3b0233..8756bef 100644
> --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp
> @@ -528,14 +528,6 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
>           /* Add the small constant index to the address register */
>           src_reg reladdr = src_reg(this, glsl_type::int_type);
>  
> -         /* We have to use a message header on Skylake to get SIMD4x2 mode.
> -          * Reserve space for the register.
> -          */
> -         if (brw->gen >= 9) {
> -            reladdr.reg_offset++;
> -            alloc.sizes[reladdr.reg] = 2;
> -         }
> -
>           dst_reg dst_reladdr = dst_reg(reladdr);
>           dst_reladdr.writemask = WRITEMASK_X;
>           emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
> @@ -553,20 +545,11 @@ vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
>  
>           result = src_reg(this, glsl_type::vec4_type);
>           src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
> -         vec4_instruction *load;
> -         if (brw->gen >= 7) {
> -            load = new(mem_ctx)
> -               vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
> -                                dst_reg(result), surf_index, reladdr);
> -            load->mlen = 1;
> -         } else {
> -            load = new(mem_ctx)
> -               vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
> -                                dst_reg(result), surf_index, reladdr);
> -            load->base_mrf = 14;
> -            load->mlen = 1;
> -         }
> -         emit(load);
> +
> +         emit_pull_constant_load_reg(dst_reg(result),
> +                                     surf_index,
> +                                     reladdr,
> +                                     NULL, NULL /* before_block/inst */);
>           break;
>        }
>  
> -- 
> 1.9.3
> 

-- 
Ben Widawsky, Intel Open Source Technology Center