[Mesa-dev] [PATCH 4/4] intel/eu: Switch to a logical state stack
Jason Ekstrand
jason at jlekstrand.net
Wed May 30 00:02:54 UTC 2018
Instead of the state stack that's based on copying a dummy instruction
around, we start using a logical stack of brw_insn_states. This uses a
bit less memory and is way less conceptually bogus.
---
src/intel/compiler/brw_eu.c | 90 +++++++++++++++++-----------------------
src/intel/compiler/brw_eu.h | 34 ++++++++++++++-
src/intel/compiler/brw_eu_emit.c | 74 ++-------------------------------
3 files changed, 72 insertions(+), 126 deletions(-)
diff --git a/src/intel/compiler/brw_eu.c b/src/intel/compiler/brw_eu.c
index 5375209..d0e4ea2 100644
--- a/src/intel/compiler/brw_eu.c
+++ b/src/intel/compiler/brw_eu.c
@@ -129,91 +129,76 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
unsigned
brw_get_default_exec_size(struct brw_codegen *p)
{
- return brw_inst_exec_size(p->devinfo, p->current);
+ return p->current->exec_size;
}
unsigned
brw_get_default_group(struct brw_codegen *p)
{
- if (p->devinfo->gen >= 6) {
- unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8;
- if (p->devinfo->gen >= 7)
- group += brw_inst_nib_control(p->devinfo, p->current) * 4;
- return group;
- } else {
- unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current);
- if (qtr_control == BRW_COMPRESSION_COMPRESSED)
- return 0;
- else
- return qtr_control * 8;
- }
+ return p->current->group;
}
unsigned
brw_get_default_access_mode(struct brw_codegen *p)
{
- return brw_inst_access_mode(p->devinfo, p->current);
+ return p->current->access_mode;
}
void
brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
{
- brw_inst_set_exec_size(p->devinfo, p->current, value);
+ p->current->exec_size = value;
}
void brw_set_default_predicate_control( struct brw_codegen *p, unsigned pc )
{
- brw_inst_set_pred_control(p->devinfo, p->current, pc);
+ p->current->predicate = pc;
}
void brw_set_default_predicate_inverse(struct brw_codegen *p, bool predicate_inverse)
{
- brw_inst_set_pred_inv(p->devinfo, p->current, predicate_inverse);
+ p->current->pred_inv = predicate_inverse;
}
void brw_set_default_flag_reg(struct brw_codegen *p, int reg, int subreg)
{
- if (p->devinfo->gen >= 7)
- brw_inst_set_flag_reg_nr(p->devinfo, p->current, reg);
-
- brw_inst_set_flag_subreg_nr(p->devinfo, p->current, subreg);
+ assert(subreg < 2);
+ p->current->flag_subreg = reg * 2 + subreg;
}
void brw_set_default_access_mode( struct brw_codegen *p, unsigned access_mode )
{
- brw_inst_set_access_mode(p->devinfo, p->current, access_mode);
+ p->current->access_mode = access_mode;
}
void
brw_set_default_compression_control(struct brw_codegen *p,
enum brw_compression compression_control)
{
- if (p->devinfo->gen >= 6) {
- /* Since we don't use the SIMD32 support in gen6, we translate
- * the pre-gen6 compression control here.
+ switch (compression_control) {
+ case BRW_COMPRESSION_NONE:
+ /* This is the "use the first set of bits of dmask/vmask/arf
+ * according to execsize" option.
*/
- switch (compression_control) {
- case BRW_COMPRESSION_NONE:
- /* This is the "use the first set of bits of dmask/vmask/arf
- * according to execsize" option.
- */
- brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1Q);
- break;
- case BRW_COMPRESSION_2NDHALF:
- /* For SIMD8, this is "use the second set of 8 bits." */
- brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_2Q);
- break;
- case BRW_COMPRESSION_COMPRESSED:
- /* For SIMD16 instruction compression, use the first set of 16 bits
- * since we don't do SIMD32 dispatch.
- */
- brw_inst_set_qtr_control(p->devinfo, p->current, GEN6_COMPRESSION_1H);
- break;
- default:
- unreachable("not reached");
- }
- } else {
- brw_inst_set_qtr_control(p->devinfo, p->current, compression_control);
+ p->current->group = 0;
+ break;
+ case BRW_COMPRESSION_2NDHALF:
+ /* For SIMD8, this is "use the second set of 8 bits." */
+ p->current->group = 8;
+ break;
+ case BRW_COMPRESSION_COMPRESSED:
+ /* For SIMD16 instruction compression, use the first set of 16 bits
+ * since we don't do SIMD32 dispatch.
+ */
+ p->current->group = 0;
+ break;
+ default:
+ unreachable("not reached");
+ }
+
+ if (p->devinfo->gen <= 6) {
+ p->current->compressed =
+ (compression_control == BRW_COMPRESSION_COMPRESSED);
}
}
@@ -246,7 +231,7 @@ brw_inst_set_compression(const struct gen_device_info *devinfo,
void
brw_set_default_compression(struct brw_codegen *p, bool on)
{
- brw_inst_set_compression(p->devinfo, p->current, on);
+ p->current->compressed = on;
}
/**
@@ -283,23 +268,22 @@ brw_inst_set_group(const struct gen_device_info *devinfo,
void
brw_set_default_group(struct brw_codegen *p, unsigned group)
{
- brw_inst_set_group(p->devinfo, p->current, group);
+ p->current->group = group;
}
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value )
{
- brw_inst_set_mask_control(p->devinfo, p->current, value);
+ p->current->mask_control = value;
}
void brw_set_default_saturate( struct brw_codegen *p, bool enable )
{
- brw_inst_set_saturate(p->devinfo, p->current, enable);
+ p->current->saturate = enable;
}
void brw_set_default_acc_write_control(struct brw_codegen *p, unsigned value)
{
- if (p->devinfo->gen >= 6)
- brw_inst_set_acc_wr_control(p->devinfo, p->current, value);
+ p->current->acc_wr_control = value;
}
void brw_push_insn_state( struct brw_codegen *p )
diff --git a/src/intel/compiler/brw_eu.h b/src/intel/compiler/brw_eu.h
index cfbb537..d0ba9bd 100644
--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -46,6 +46,36 @@ extern "C" {
#define BRW_EU_MAX_INSN_STACK 5
+struct brw_insn_state {
+ /* One of BRW_EXECUTE_* */
+ unsigned exec_size:3;
+
+ /* Group in units of channels */
+ unsigned group:5;
+
+ /* Compression control on gen4-5 */
+ bool compressed:1;
+
+ /* One of BRW_MASK_* */
+ unsigned mask_control:1;
+
+ bool saturate:1;
+
+ /* One of BRW_ALIGN_* */
+ unsigned access_mode:1;
+
+ /* One of BRW_PREDICATE_* */
+ enum brw_predicate predicate:4;
+
+ bool pred_inv:1;
+
+ /* Flag subreg. Bottom bit is subreg, top bit is reg */
+ unsigned flag_subreg:2;
+
+ bool acc_wr_control:1;
+};
+
+
/* A helper for accessing the last instruction emitted. This makes it easy
* to set various bits on an instruction without having to create temporary
* variable and assign the emitted instruction to those.
@@ -62,8 +92,8 @@ struct brw_codegen {
/* Allow clients to push/pop instruction state:
*/
- brw_inst stack[BRW_EU_MAX_INSN_STACK];
- brw_inst *current;
+ struct brw_insn_state stack[BRW_EU_MAX_INSN_STACK];
+ struct brw_insn_state *current;
/** Whether or not the user wants automatic exec sizes
*
diff --git a/src/intel/compiler/brw_eu_emit.c b/src/intel/compiler/brw_eu_emit.c
index 0dfe26a..8dff1fd 100644
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -621,72 +621,6 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
}
-struct brw_insn_state {
- /* One of BRW_EXECUTE_* */
- unsigned exec_size:3;
-
- /* Group in units of channels */
- unsigned group:5;
-
- /* Compression control on gen4-5 */
- bool compressed:1;
-
- /* One of BRW_MASK_* */
- unsigned mask_control:1;
-
- bool saturate:1;
-
- /* One of BRW_ALIGN_* */
- unsigned access_mode:1;
-
- /* One of BRW_PREDICATE_* */
- enum brw_predicate predicate:4;
-
- bool pred_inv:1;
-
- /* Flag subreg. Bottom bit is subreg, top bit is reg */
- unsigned flag_subreg:2;
-
- bool acc_wr_control:1;
-};
-
-static struct brw_insn_state
-brw_inst_get_state(const struct gen_device_info *devinfo,
- const brw_inst *insn)
-{
- struct brw_insn_state state = { };
-
- state.exec_size = brw_inst_exec_size(devinfo, insn);
- if (devinfo->gen >= 6) {
- state.group = brw_inst_qtr_control(devinfo, insn) * 8;
- if (devinfo->gen >= 7)
- state.group += brw_inst_nib_control(devinfo, insn) * 4;
- } else {
- unsigned qtr_control = brw_inst_qtr_control(devinfo, insn);
- if (qtr_control == BRW_COMPRESSION_COMPRESSED) {
- state.group = 0;
- state.compressed = true;
- } else {
- state.group = qtr_control * 8;
- state.compressed = false;
- }
- }
- state.access_mode = brw_inst_access_mode(devinfo, insn);
- state.mask_control = brw_inst_mask_control(devinfo, insn);
- state.saturate = brw_inst_saturate(devinfo, insn);
- state.predicate = brw_inst_pred_control(devinfo, insn);
- state.pred_inv = brw_inst_pred_inv(devinfo, insn);
-
- state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn);
- if (devinfo->gen >= 7)
- state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2;
-
- if (devinfo->gen >= 6)
- state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn);
-
- return state;
-}
-
static void
brw_inst_set_state(const struct gen_device_info *devinfo,
brw_inst *insn,
@@ -735,8 +669,7 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode)
brw_inst_set_opcode(devinfo, insn, opcode);
/* Apply the default instruction state */
- struct brw_insn_state current = brw_inst_get_state(devinfo, p->current);
- brw_inst_set_state(devinfo, insn, ¤t);
+ brw_inst_set_state(devinfo, insn, p->current);
return insn;
}
@@ -3503,9 +3436,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
*/
inst = brw_FBL(p, vec1(dst), exec_mask);
} else {
- const struct brw_reg flag = brw_flag_reg(
- brw_inst_flag_reg_nr(devinfo, p->current),
- brw_inst_flag_subreg_nr(devinfo, p->current));
+ const struct brw_reg flag = brw_flag_reg(p->current->flag_subreg / 2,
+ p->current->flag_subreg % 2);
brw_set_default_exec_size(p, BRW_EXECUTE_1);
brw_MOV(p, retype(flag, BRW_REGISTER_TYPE_UD), brw_imm_ud(0));
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list