[Mesa-dev] [PATCH 09/10] i965/fs: Calculate delta_x and delta_y together.

Jason Ekstrand jason at jlekstrand.net
Fri Apr 17 11:58:24 PDT 2015


On Fri, Apr 17, 2015 at 11:56 AM, Matt Turner <mattst88 at gmail.com> wrote:
> On Fri, Apr 17, 2015 at 11:45 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> On Tue, Apr 14, 2015 at 4:15 PM, Matt Turner <mattst88 at gmail.com> wrote:
>>> This lets SIMD16 programs on G45 and Gen5 use the PLN instruction.
>>
>> This patch also changes the meaning of FS_OPCODE_LINTERP.  Those
>> chainges should be described in the commit message as well.
>>
>>> On Ironlake:
>>>
>>> total instructions in shared programs: 5634757 -> 5518055 (-2.07%)
>>> instructions in affected programs:     1745837 -> 1629135 (-6.68%)
>>> helped:                                11439
>>> HURT:                                  4
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_fs.cpp              | 46 +++++++-------------
>>>  src/mesa/drivers/dri/i965/brw_fs.h                |  3 +-
>>>  src/mesa/drivers/dri/i965/brw_fs_generator.cpp    |  5 +--
>>>  src/mesa/drivers/dri/i965/brw_fs_nir.cpp          | 13 +++---
>>>  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |  8 ++--
>>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp      | 51 +++++++++++------------
>>>  src/mesa/drivers/dri/i965/brw_reg.h               |  7 ++++
>>>  7 files changed, 59 insertions(+), 74 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> index 5ab8701..6e55f67 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> @@ -1259,8 +1259,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
>>>        emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
>>>     } else {
>>>        emit(FS_OPCODE_LINTERP, wpos,
>>> -           this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>> -           this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>> +           this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>>             interp_reg(VARYING_SLOT_POS, 2));
>>>     }
>>>     wpos = offset(wpos, 1);
>>> @@ -1302,8 +1301,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
>>>        barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
>>>     }
>>>     return emit(FS_OPCODE_LINTERP, attr,
>>> -               this->delta_x[barycoord_mode],
>>> -               this->delta_y[barycoord_mode], interp);
>>> +               this->delta_xy[barycoord_mode], interp);
>>>  }
>>>
>>>  void
>>> @@ -1851,8 +1849,8 @@ fs_visitor::assign_urb_setup()
>>>      */
>>>     foreach_block_and_inst(block, fs_inst, inst, cfg) {
>>>        if (inst->opcode == FS_OPCODE_LINTERP) {
>>> -        assert(inst->src[2].file == HW_REG);
>>> -        inst->src[2].fixed_hw_reg.nr += urb_start;
>>> +        assert(inst->src[1].file == HW_REG);
>>> +        inst->src[1].fixed_hw_reg.nr += urb_start;
>>>        }
>>>
>>>        if (inst->opcode == FS_OPCODE_CINTERP) {
>>> @@ -2106,25 +2104,16 @@ fs_visitor::compact_virtual_grfs()
>>>        }
>>>     }
>>>
>>> -   /* Patch all the references to delta_x/delta_y, since they're used in
>>> -    * register allocation.  If they're unused, switch them to BAD_FILE so
>>> -    * we don't think some random VGRF is delta_x/delta_y.
>>> +   /* Patch all the references to delta_xy, since they're used in register
>>> +    * allocation.  If they're unused, switch them to BAD_FILE so we don't
>>> +    * think some random VGRF is delta_xy.
>>>      */
>>> -   for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
>>> -      if (delta_x[i].file == GRF) {
>>> -         if (remap_table[delta_x[i].reg] != -1) {
>>> -            delta_x[i].reg = remap_table[delta_x[i].reg];
>>> +   for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
>>> +      if (delta_xy[i].file == GRF) {
>>> +         if (remap_table[delta_xy[i].reg] != -1) {
>>> +            delta_xy[i].reg = remap_table[delta_xy[i].reg];
>>>           } else {
>>> -            delta_x[i].file = BAD_FILE;
>>> -         }
>>> -      }
>>> -   }
>>> -   for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
>>> -      if (delta_y[i].file == GRF) {
>>> -         if (remap_table[delta_y[i].reg] != -1) {
>>> -            delta_y[i].reg = remap_table[delta_y[i].reg];
>>> -         } else {
>>> -            delta_y[i].file = BAD_FILE;
>>> +            delta_xy[i].file = BAD_FILE;
>>>           }
>>>        }
>>>     }
>>> @@ -2589,14 +2578,9 @@ fs_visitor::opt_register_renaming()
>>>     if (progress) {
>>>        invalidate_live_intervals();
>>>
>>> -      for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
>>> -         if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
>>> -            delta_x[i].reg = remap[delta_x[i].reg];
>>> -         }
>>> -      }
>>> -      for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
>>> -         if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
>>> -            delta_y[i].reg = remap[delta_y[i].reg];
>>> +      for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
>>> +         if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
>>> +            delta_xy[i].reg = remap[delta_xy[i].reg];
>>>           }
>>>        }
>>>     }
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
>>> index b7c1c39..e04691c 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>>> @@ -512,8 +512,7 @@ public:
>>>     fs_reg pixel_y;
>>>     fs_reg wpos_w;
>>>     fs_reg pixel_w;
>>> -   fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>>> -   fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>>> +   fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>>>     fs_reg shader_start_time;
>>>     fs_reg userplane[MAX_CLIP_PLANES];
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>> index fc9597e..dba3286 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>> @@ -392,11 +392,10 @@ fs_generator::generate_linterp(fs_inst *inst,
>>>                              struct brw_reg dst, struct brw_reg *src)
>>>  {
>>>     struct brw_reg delta_x = src[0];
>>> -   struct brw_reg delta_y = src[1];
>>> -   struct brw_reg interp = src[2];
>>> +   struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
>>
>> This is an awkward use of offset().  It's supposed to take the width
>> into account for you.  Why do you need to do it explicitly?  If
>> there's something funny going on here, it deserves a comment.
>
> I don't see any evidence of it considering width:

Right... Sorry, I was thinking about the fs_reg offset function.

> static inline struct brw_reg
> offset(struct brw_reg reg, unsigned delta)
> {
>    reg.nr += delta;
>    return reg;
> }
>
> Presumably you're thinking of offset(fs_reg, unsigned).
>
> The delta_y variable is only used explicitly on original Gen4 where we
> have to use line+mac. The delta values are in consecutive registers
> (x0, x1), (y0, y1), and extending it to SIMD16 you add an extra two
> registers, giving you (x0, x1), (x2, x3), (y0, y1), (y2, y3). So you
> offset 1 register in SIMD8 and two registers in SIMD16.
>
> Does that warrant a comment?

Nope, it's fine.


More information about the mesa-dev mailing list