[Mesa-dev] [PATCH 31/47] (0031) i965/fs: Extend thread payload layout to SIMD32.
Shaofeng Tang
shaofeng.tang at intel.com
Mon May 21 03:30:05 UTC 2018
From: Kevin Rogovin <kevin.rogovin at intel.com>
And handle 32-wide payload register reads in fetch_payload_reg().
Change-Id: I7d6b8d5c2fe59d10cf4f0cc5e77455776851d519
---
src/intel/compiler/brw_fs.cpp | 14 +++++++-------
src/intel/compiler/brw_fs.h | 41 +++++++++++++++++++++++++++++-----------
src/intel/compiler/brw_wm_iz.cpp | 11 +++++++----
3 files changed, 44 insertions(+), 22 deletions(-)
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index bc3b881..b8bbdce 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -4079,12 +4079,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
}
}
- if (payload.aa_dest_stencil_reg /* XXX - payload.aa_dest_stencil_reg[0]*/) {
+ if (payload.aa_dest_stencil_reg[0] /* XXX - payload.aa_dest_stencil_reg[0]*/) {
assert(inst->group < 16);
sources[length] = fs_reg(VGRF, bld.shader->alloc.allocate(1));
bld.group(8, 0).exec_all().annotate("FB write stencil/AA alpha")
.MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg
+ fs_reg(brw_vec8_grf(payload.aa_dest_stencil_reg[0]
/* payload.aa_dest_stencil_reg[inst->group / 16] */, 0)));
length++;
}
@@ -6044,7 +6044,7 @@ fs_visitor::setup_fs_payload_gen6()
*/
for (int i = 0; i < BRW_BARYCENTRIC_MODE_COUNT; ++i) {
if (prog_data->barycentric_interp_modes & (1 << i)) {
- payload.barycentric_coord_reg[i] = payload.num_regs;
+ payload.barycentric_coord_reg[i][0] = payload.num_regs;
payload.num_regs += 2;
if (dispatch_width == 16) {
payload.num_regs += 2;
@@ -6056,7 +6056,7 @@ fs_visitor::setup_fs_payload_gen6()
prog_data->uses_src_depth =
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
if (prog_data->uses_src_depth) {
- payload.source_depth_reg = payload.num_regs;
+ payload.source_depth_reg[0] = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
/* R28: interpolated depth if not SIMD8. */
@@ -6068,7 +6068,7 @@ fs_visitor::setup_fs_payload_gen6()
prog_data->uses_src_w =
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
if (prog_data->uses_src_w) {
- payload.source_w_reg = payload.num_regs;
+ payload.source_w_reg[0] = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
/* R30: interpolated W if not SIMD8. */
@@ -6089,7 +6089,7 @@ fs_visitor::setup_fs_payload_gen6()
* persample dispatch, we hard-code it to 0.5.
*/
prog_data->uses_pos_offset = true;
- payload.sample_pos_reg = payload.num_regs;
+ payload.sample_pos_reg[0] = payload.num_regs;
payload.num_regs++;
}
@@ -6098,7 +6098,7 @@ fs_visitor::setup_fs_payload_gen6()
(nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0;
if (prog_data->uses_sample_mask) {
assert(devinfo->gen >= 7);
- payload.sample_mask_in_reg = payload.num_regs;
+ payload.sample_mask_in_reg[0] = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
/* R33: input coverage mask if not SIMD8. */
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h
index d626780..e27c04e 100644
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -338,14 +338,15 @@ public:
/** Register numbers for thread payload fields. */
struct thread_payload {
- uint8_t source_depth_reg;
- uint8_t source_w_reg;
- uint8_t aa_dest_stencil_reg;
- uint8_t dest_depth_reg;
- uint8_t sample_pos_reg;
- uint8_t sample_mask_in_reg;
- uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT];
- uint8_t local_invocation_id_reg;
+ uint8_t subspan_coord_reg[2];
+ uint8_t source_depth_reg[2];
+ uint8_t source_w_reg[2];
+ uint8_t aa_dest_stencil_reg[2];
+ uint8_t dest_depth_reg[2];
+ uint8_t sample_pos_reg[2];
+ uint8_t sample_mask_in_reg[2];
+ uint8_t barycentric_coord_reg[BRW_BARYCENTRIC_MODE_COUNT][2];
+ uint8_t local_invocation_id_reg[2];
/** The number of thread payload registers the hardware will supply. */
uint8_t num_regs;
@@ -499,13 +500,31 @@ private:
namespace brw {
inline fs_reg
- fetch_payload_reg(const brw::fs_builder &bld, uint8_t reg,
+ fetch_payload_reg(const brw::fs_builder &bld, uint8_t regs[2],
brw_reg_type type = BRW_REGISTER_TYPE_F, unsigned n = 1)
{
- if (!reg) {
+ if (!regs[0]) {
return fs_reg();
+
+ } if (bld.dispatch_width() > 16) {
+ const fs_reg tmp = bld.vgrf(type, n);
+ const brw::fs_builder hbld = bld.exec_all().group(16, 0);
+ const unsigned m = bld.dispatch_width() / hbld.dispatch_width();
+ fs_reg *const components = new fs_reg[n * m];
+
+ for (unsigned c = 0; c < n; c++) {
+ for (unsigned g = 0; g < m; g++)
+ components[c * m + g] =
+ offset(retype(brw_vec8_grf(regs[g], 0), type), hbld, c);
+ }
+
+ hbld.LOAD_PAYLOAD(tmp, components, n * m, 0);
+
+ delete[] components;
+ return tmp;
+
} else {
- return fs_reg(retype(brw_vec8_grf(reg, 0), type));
+ return fs_reg(retype(brw_vec8_grf(regs[0], 0), type));
}
}
}
diff --git a/src/intel/compiler/brw_wm_iz.cpp b/src/intel/compiler/brw_wm_iz.cpp
index fead165..b9b7e70 100644
--- a/src/intel/compiler/brw_wm_iz.cpp
+++ b/src/intel/compiler/brw_wm_iz.cpp
@@ -122,9 +122,10 @@ static const struct {
void fs_visitor::setup_fs_payload_gen4()
{
assert(stage == MESA_SHADER_FRAGMENT);
+ assert(dispatch_width <= 16);
struct brw_wm_prog_data *prog_data = brw_wm_prog_data(this->prog_data);
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- GLuint reg = 2;
+ GLuint reg = 1;
bool kill_stats_promoted_workaround = false;
int lookup = key->iz_lookup;
@@ -141,11 +142,13 @@ void fs_visitor::setup_fs_payload_gen4()
kill_stats_promoted_workaround = true;
}
+ payload.subspan_coord_reg[0] = reg++;
+
prog_data->uses_src_depth =
(nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
if (wm_iz_table[lookup].sd_present || prog_data->uses_src_depth ||
kill_stats_promoted_workaround) {
- payload.source_depth_reg = reg;
+ payload.source_depth_reg[0] = reg;
reg += 2;
}
@@ -153,14 +156,14 @@ void fs_visitor::setup_fs_payload_gen4()
source_depth_to_render_target = true;
if (wm_iz_table[lookup].ds_present || key->line_aa != BRW_WM_AA_NEVER) {
- payload.aa_dest_stencil_reg = reg;
+ payload.aa_dest_stencil_reg[0] = reg;
runtime_check_aads_emit =
!wm_iz_table[lookup].ds_present && key->line_aa == BRW_WM_AA_SOMETIMES;
reg++;
}
if (wm_iz_table[lookup].dd_present) {
- payload.dest_depth_reg = reg;
+ payload.dest_depth_reg[0] = reg;
reg+=2;
}
--
2.7.4
More information about the mesa-dev
mailing list