[Mesa-dev] [PATCH 09/10] i965/fs: Calculate delta_x and delta_y together.
Jason Ekstrand
jason at jlekstrand.net
Fri Apr 17 11:58:24 PDT 2015
On Fri, Apr 17, 2015 at 11:56 AM, Matt Turner <mattst88 at gmail.com> wrote:
> On Fri, Apr 17, 2015 at 11:45 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
>> On Tue, Apr 14, 2015 at 4:15 PM, Matt Turner <mattst88 at gmail.com> wrote:
>>> This lets SIMD16 programs on G45 and Gen5 use the PLN instruction.
>>
>> This patch also changes the meaning of FS_OPCODE_LINTERP. Those
>> chainges should be described in the commit message as well.
>>
>>> On Ironlake:
>>>
>>> total instructions in shared programs: 5634757 -> 5518055 (-2.07%)
>>> instructions in affected programs: 1745837 -> 1629135 (-6.68%)
>>> helped: 11439
>>> HURT: 4
>>> ---
>>> src/mesa/drivers/dri/i965/brw_fs.cpp | 46 +++++++-------------
>>> src/mesa/drivers/dri/i965/brw_fs.h | 3 +-
>>> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +--
>>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 13 +++---
>>> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 ++--
>>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 51 +++++++++++------------
>>> src/mesa/drivers/dri/i965/brw_reg.h | 7 ++++
>>> 7 files changed, 59 insertions(+), 74 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> index 5ab8701..6e55f67 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>>> @@ -1259,8 +1259,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
>>> emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
>>> } else {
>>> emit(FS_OPCODE_LINTERP, wpos,
>>> - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>> - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>> + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>>> interp_reg(VARYING_SLOT_POS, 2));
>>> }
>>> wpos = offset(wpos, 1);
>>> @@ -1302,8 +1301,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
>>> barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
>>> }
>>> return emit(FS_OPCODE_LINTERP, attr,
>>> - this->delta_x[barycoord_mode],
>>> - this->delta_y[barycoord_mode], interp);
>>> + this->delta_xy[barycoord_mode], interp);
>>> }
>>>
>>> void
>>> @@ -1851,8 +1849,8 @@ fs_visitor::assign_urb_setup()
>>> */
>>> foreach_block_and_inst(block, fs_inst, inst, cfg) {
>>> if (inst->opcode == FS_OPCODE_LINTERP) {
>>> - assert(inst->src[2].file == HW_REG);
>>> - inst->src[2].fixed_hw_reg.nr += urb_start;
>>> + assert(inst->src[1].file == HW_REG);
>>> + inst->src[1].fixed_hw_reg.nr += urb_start;
>>> }
>>>
>>> if (inst->opcode == FS_OPCODE_CINTERP) {
>>> @@ -2106,25 +2104,16 @@ fs_visitor::compact_virtual_grfs()
>>> }
>>> }
>>>
>>> - /* Patch all the references to delta_x/delta_y, since they're used in
>>> - * register allocation. If they're unused, switch them to BAD_FILE so
>>> - * we don't think some random VGRF is delta_x/delta_y.
>>> + /* Patch all the references to delta_xy, since they're used in register
>>> + * allocation. If they're unused, switch them to BAD_FILE so we don't
>>> + * think some random VGRF is delta_xy.
>>> */
>>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
>>> - if (delta_x[i].file == GRF) {
>>> - if (remap_table[delta_x[i].reg] != -1) {
>>> - delta_x[i].reg = remap_table[delta_x[i].reg];
>>> + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
>>> + if (delta_xy[i].file == GRF) {
>>> + if (remap_table[delta_xy[i].reg] != -1) {
>>> + delta_xy[i].reg = remap_table[delta_xy[i].reg];
>>> } else {
>>> - delta_x[i].file = BAD_FILE;
>>> - }
>>> - }
>>> - }
>>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
>>> - if (delta_y[i].file == GRF) {
>>> - if (remap_table[delta_y[i].reg] != -1) {
>>> - delta_y[i].reg = remap_table[delta_y[i].reg];
>>> - } else {
>>> - delta_y[i].file = BAD_FILE;
>>> + delta_xy[i].file = BAD_FILE;
>>> }
>>> }
>>> }
>>> @@ -2589,14 +2578,9 @@ fs_visitor::opt_register_renaming()
>>> if (progress) {
>>> invalidate_live_intervals();
>>>
>>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
>>> - if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
>>> - delta_x[i].reg = remap[delta_x[i].reg];
>>> - }
>>> - }
>>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
>>> - if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
>>> - delta_y[i].reg = remap[delta_y[i].reg];
>>> + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
>>> + if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
>>> + delta_xy[i].reg = remap[delta_xy[i].reg];
>>> }
>>> }
>>> }
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
>>> index b7c1c39..e04691c 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>>> @@ -512,8 +512,7 @@ public:
>>> fs_reg pixel_y;
>>> fs_reg wpos_w;
>>> fs_reg pixel_w;
>>> - fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>>> - fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>>> + fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>>> fs_reg shader_start_time;
>>> fs_reg userplane[MAX_CLIP_PLANES];
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>> index fc9597e..dba3286 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>>> @@ -392,11 +392,10 @@ fs_generator::generate_linterp(fs_inst *inst,
>>> struct brw_reg dst, struct brw_reg *src)
>>> {
>>> struct brw_reg delta_x = src[0];
>>> - struct brw_reg delta_y = src[1];
>>> - struct brw_reg interp = src[2];
>>> + struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
>>
>> This is an awkward use of offset(). It's supposed to take the width
>> into account for you. Why do you need to do it explicitly? If
>> there's something funny going on here, it deserves a comment.
>
> I don't see any evidence of it considering width:
Right... Sorry, I was thinking about the fs_reg offset function.
> static inline struct brw_reg
> offset(struct brw_reg reg, unsigned delta)
> {
> reg.nr += delta;
> return reg;
> }
>
> Presumably you're thinking of offset(fs_reg, unsigned).
>
> The delta_y variable is only used explicitly on original Gen4 where we
> have to use line+mac. The delta values are in consecutive registers
> (x0, x1), (y0, y1), and extending it to SIMD16 you add an extra two
> registers, giving you (x0, x1), (x2, x3), (y0, y1), (y2, y3). So you
> offset 1 register in SIMD8 and two registers in SIMD16.
>
> Does that warrant a comment?
Nope, it's fine.
More information about the mesa-dev
mailing list