[Mesa-dev] [PATCH 09/10] i965/fs: Calculate delta_x and delta_y together.
Matt Turner
mattst88 at gmail.com
Fri Apr 17 11:56:13 PDT 2015
On Fri, Apr 17, 2015 at 11:45 AM, Jason Ekstrand <jason at jlekstrand.net> wrote:
> On Tue, Apr 14, 2015 at 4:15 PM, Matt Turner <mattst88 at gmail.com> wrote:
>> This lets SIMD16 programs on G45 and Gen5 use the PLN instruction.
>
> This patch also changes the meaning of FS_OPCODE_LINTERP. Those
> chainges should be described in the commit message as well.
>
>> On Ironlake:
>>
>> total instructions in shared programs: 5634757 -> 5518055 (-2.07%)
>> instructions in affected programs: 1745837 -> 1629135 (-6.68%)
>> helped: 11439
>> HURT: 4
>> ---
>> src/mesa/drivers/dri/i965/brw_fs.cpp | 46 +++++++-------------
>> src/mesa/drivers/dri/i965/brw_fs.h | 3 +-
>> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 5 +--
>> src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 13 +++---
>> src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 8 ++--
>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 51 +++++++++++------------
>> src/mesa/drivers/dri/i965/brw_reg.h | 7 ++++
>> 7 files changed, 59 insertions(+), 74 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 5ab8701..6e55f67 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1259,8 +1259,7 @@ fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
>> emit(MOV(wpos, fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
>> } else {
>> emit(FS_OPCODE_LINTERP, wpos,
>> - this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>> - this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>> + this->delta_xy[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
>> interp_reg(VARYING_SLOT_POS, 2));
>> }
>> wpos = offset(wpos, 1);
>> @@ -1302,8 +1301,7 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
>> barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
>> }
>> return emit(FS_OPCODE_LINTERP, attr,
>> - this->delta_x[barycoord_mode],
>> - this->delta_y[barycoord_mode], interp);
>> + this->delta_xy[barycoord_mode], interp);
>> }
>>
>> void
>> @@ -1851,8 +1849,8 @@ fs_visitor::assign_urb_setup()
>> */
>> foreach_block_and_inst(block, fs_inst, inst, cfg) {
>> if (inst->opcode == FS_OPCODE_LINTERP) {
>> - assert(inst->src[2].file == HW_REG);
>> - inst->src[2].fixed_hw_reg.nr += urb_start;
>> + assert(inst->src[1].file == HW_REG);
>> + inst->src[1].fixed_hw_reg.nr += urb_start;
>> }
>>
>> if (inst->opcode == FS_OPCODE_CINTERP) {
>> @@ -2106,25 +2104,16 @@ fs_visitor::compact_virtual_grfs()
>> }
>> }
>>
>> - /* Patch all the references to delta_x/delta_y, since they're used in
>> - * register allocation. If they're unused, switch them to BAD_FILE so
>> - * we don't think some random VGRF is delta_x/delta_y.
>> + /* Patch all the references to delta_xy, since they're used in register
>> + * allocation. If they're unused, switch them to BAD_FILE so we don't
>> + * think some random VGRF is delta_xy.
>> */
>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
>> - if (delta_x[i].file == GRF) {
>> - if (remap_table[delta_x[i].reg] != -1) {
>> - delta_x[i].reg = remap_table[delta_x[i].reg];
>> + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
>> + if (delta_xy[i].file == GRF) {
>> + if (remap_table[delta_xy[i].reg] != -1) {
>> + delta_xy[i].reg = remap_table[delta_xy[i].reg];
>> } else {
>> - delta_x[i].file = BAD_FILE;
>> - }
>> - }
>> - }
>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
>> - if (delta_y[i].file == GRF) {
>> - if (remap_table[delta_y[i].reg] != -1) {
>> - delta_y[i].reg = remap_table[delta_y[i].reg];
>> - } else {
>> - delta_y[i].file = BAD_FILE;
>> + delta_xy[i].file = BAD_FILE;
>> }
>> }
>> }
>> @@ -2589,14 +2578,9 @@ fs_visitor::opt_register_renaming()
>> if (progress) {
>> invalidate_live_intervals();
>>
>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_x); i++) {
>> - if (delta_x[i].file == GRF && remap[delta_x[i].reg] != -1) {
>> - delta_x[i].reg = remap[delta_x[i].reg];
>> - }
>> - }
>> - for (unsigned i = 0; i < ARRAY_SIZE(delta_y); i++) {
>> - if (delta_y[i].file == GRF && remap[delta_y[i].reg] != -1) {
>> - delta_y[i].reg = remap[delta_y[i].reg];
>> + for (unsigned i = 0; i < ARRAY_SIZE(delta_xy); i++) {
>> + if (delta_xy[i].file == GRF && remap[delta_xy[i].reg] != -1) {
>> + delta_xy[i].reg = remap[delta_xy[i].reg];
>> }
>> }
>> }
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
>> index b7c1c39..e04691c 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>> @@ -512,8 +512,7 @@ public:
>> fs_reg pixel_y;
>> fs_reg wpos_w;
>> fs_reg pixel_w;
>> - fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>> - fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>> + fs_reg delta_xy[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>> fs_reg shader_start_time;
>> fs_reg userplane[MAX_CLIP_PLANES];
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> index fc9597e..dba3286 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
>> @@ -392,11 +392,10 @@ fs_generator::generate_linterp(fs_inst *inst,
>> struct brw_reg dst, struct brw_reg *src)
>> {
>> struct brw_reg delta_x = src[0];
>> - struct brw_reg delta_y = src[1];
>> - struct brw_reg interp = src[2];
>> + struct brw_reg delta_y = offset(src[0], dispatch_width / 8);
>
> This is an awkward use of offset(). It's supposed to take the width
> into account for you. Why do you need to do it explicitly? If
> there's something funny going on here, it deserves a comment.
I don't see any evidence of it considering width:
static inline struct brw_reg
offset(struct brw_reg reg, unsigned delta)
{
reg.nr += delta;
return reg;
}
Presumably you're thinking of offset(fs_reg, unsigned).
The delta_y variable is only used explicitly on original Gen4 where we
have to use line+mac. The delta values are in consecutive registers
(x0, x1), (y0, y1), and extending it to SIMD16 you add an extra two
registers, giving you (x0, x1), (x2, x3), (y0, y1), (y2, y3). So you
offset 1 register in SIMD8 and two registers in SIMD16.
Does that warrant a comment?
More information about the mesa-dev
mailing list