[Mesa-dev] [PATCH 09/10] i965/fs: Calculate delta_x and delta_y together.

Kenneth Graunke kenneth at whitecape.org
Fri Apr 17 11:58:07 PDT 2015


On Friday, April 17, 2015 11:45:57 AM Jason Ekstrand wrote:
> On Tue, Apr 14, 2015 at 4:15 PM, Matt Turner <mattst88 at gmail.com> wrote:
> > This lets SIMD16 programs on G45 and Gen5 use the PLN instruction.
> 
> This patch also changes the meaning of FS_OPCODE_LINTERP.  Those
> chainges should be described in the commit message as well.
> 
> > On Ironlake:
> >
> > total instructions in shared programs: 5634757 -> 5518055 (-2.07%)
> > instructions in affected programs:     1745837 -> 1629135 (-6.68%)
> > helped:                                11439
> > HURT:                                  4
> > ---
> >  src/mesa/drivers/dri/i965/brw_fs.cpp              | 46 +++++++-------------
> >  src/mesa/drivers/dri/i965/brw_fs.h                |  3 +-
> >  src/mesa/drivers/dri/i965/brw_fs_generator.cpp    |  5 +--
> >  src/mesa/drivers/dri/i965/brw_fs_nir.cpp          | 13 +++---
> >  src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp |  8 ++--
> >  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp      | 51 +++++++++++------------
> >  src/mesa/drivers/dri/i965/brw_reg.h               |  7 ++++
> >  7 files changed, 59 insertions(+), 74 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > index 5ab8701..6e55f67 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> > @@ -1259,8 +1259,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
> >        emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
> >     } else {
> >        emit(FS_OPCODE_LINTERP, wpos,
> > -           this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
> > -           this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
> > +           this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
> >             interp_reg(VARYING_SLOT_POS, 2));
> >     }
> >     wpos = offset(wpos, 1);
> > @@ -1302,8 +1301,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
> >        barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
> >     }
> >     return emit(FS_OPCODE_LINTERP, attr,
> > -               this->delta_x[barycoord_mode],
> > -               this->delta_y[barycoord_mode], interp);
> > +               this->delta_xy[barycoord_mode], interp);
> >  }
> >
> >  void
> > @@ -1851,8 +1849,8 @@ fs_visitor::assign_urb_setup()
> >      */
> >     foreach_block_and_inst(block, fs_inst, inst, cfg) {
> >        if (inst->opcode == FS_OPCODE_LINTERP) {
> > -        assert(inst->src[2].file == HW_REG);
> > -        inst->src[2].fixed_hw_reg.nr += urb_start;
> > +        assert(inst->src[1].file == HW_REG);
> > +        inst->src[1].fixed_hw_reg.nr += urb_start;
> >        }
> >
> >        if (inst->opcode == FS_OPCODE_CINTERP) {
> > @@ -2106,25 +2104,16 @@ fs_visitor::compact_virtual_grfs()
> >        }
> >     }
> >
> > -   /* Patch all the references to delta_x/delta_y, since they're used in
> > -    * register allocation.  If they're unused, switch them to BAD_FILE so
> > -    * we don't think some random VGRF is delta_x/delta_y.
> > +   /* Patch all the references to delta_xy, since they're used in register
> > +    * allocation.  If they're unused, switch them to BAD_FILE so we don't
> > +    * think some random VGRF is delta_xy.
> >      */
> > -   for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
> > -      if (delta_x[i].file == GRF) {
> > -         if (remap_table[delta_x[i].reg] != -1) {
> > -            delta_x[i].reg = remap_table[delta_x[i].reg];
> > +   for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
> > +      if (delta_xy[i].file == GRF) {
> > +         if (remap_table[delta_xy[i].reg] != -1) {
> > +            delta_xy[i].reg = remap_table[delta_xy[i].reg];
> >           } else {
> > -            delta_x[i].file = BAD_FILE;
> > -         }
> > -      }
> > -   }
> > -   for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
> > -      if (delta_y[i].file == GRF) {
> > -         if (remap_table[delta_y[i].reg] != -1) {
> > -            delta_y[i].reg = remap_table[delta_y[i].reg];
> > -         } else {
> > -            delta_y[i].file = BAD_FILE;
> > +            delta_xy[i].file = BAD_FILE;
> >           }
> >        }
> >     }
> > @@ -2589,14 +2578,9 @@ fs_visitor::opt_register_renaming()
> >     if (progress) {
> >        invalidate_live_intervals();
> >
> > -      for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
> > -         if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
> > -            delta_x[i].reg = remap[delta_x[i].reg];
> > -         }
> > -      }
> > -      for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
> > -         if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
> > -            delta_y[i].reg = remap[delta_y[i].reg];
> > +      for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
> > +         if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
> > +            delta_xy[i].reg = remap[delta_xy[i].reg];
> >           }
> >        }
> >     }
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> > index b7c1c39..e04691c 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs.h
> > +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> > @@ -512,8 +512,7 @@ public:
> >     fs_reg pixel_y;
> >     fs_reg wpos_w;
> >     fs_reg pixel_w;
> > -   fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> > -   fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> > +   fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
> >     fs_reg shader_start_time;
> >     fs_reg userplane[MAX_CLIP_PLANES];
> >
> > diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > index fc9597e..dba3286 100644
> > --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> > @@ -392,11 +392,10 @@ fs_generator::generate_linterp(fs_inst *inst,
> >                              struct brw_reg dst, struct brw_reg *src)
> >  {
> >     struct brw_reg delta_x = src[0];
> > -   struct brw_reg delta_y = src[1];
> > -   struct brw_reg interp = src[2];
> > +   struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
> 
> This is an awkward use of offset().  It's supposed to take the width
> into account for you.  Why do you need to do it explicitly?  If
> there's something funny going on here, it deserves a comment.

Oh the confusion!

I believe Matt is using brw_reg.h's offset(), which works on brw_regs:

    static inline struct brw_reg
    offset(struct brw_reg reg, unsigned delta)
    {
       reg.nr += delta;
       return reg;
    }

while you're talking about brw_ir_fs.h's offset(), which works on
fs_regs:

    static inline fs_reg
    offset(fs_reg reg, unsigned delta)
    {  
       ...piles of code...
    }
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 819 bytes
Desc: This is a digitally signed message part.
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150417/565f7846/attachment.sig>


More information about the mesa-dev mailing list