[Mesa-dev] [PATCH 2/2] nir/lower_vec_to_movs: Coalesce reg writes there are no hazards
Jason Ekstrand
jason at jlekstrand.net
Wed Apr 4 05:23:37 UTC 2018
Instead of just giving up on coalescing if the destination of the vecN
is a register, we look to see if there are any hazards that would
prevent us from moving the write earlier. This allows us to handle a
few more cases.
Shader-db results on Haswell:
total instructions in shared programs: 13659101 -> 13658993 (<.01%)
instructions in affected programs: 29438 -> 29330 (-0.37%)
helped: 36
HURT: 0
Cc: Matt Turner <mattst88 at gmail.com>
---
src/compiler/nir/nir_lower_vec_to_movs.c | 69 ++++++++++++++++++++++++++++++--
1 file changed, 66 insertions(+), 3 deletions(-)
diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c
index 610a362..c38052a 100644
--- a/src/compiler/nir/nir_lower_vec_to_movs.c
+++ b/src/compiler/nir/nir_lower_vec_to_movs.c
@@ -112,13 +112,25 @@ has_replicated_dest(nir_alu_instr *alu)
alu->op == nir_op_fdph_replicated;
}
+static bool
+src_does_not_read_reg(nir_src *src, void *void_reg)
+{
+ return src->is_ssa || src->reg.reg != void_reg;
+}
+
+static bool
+dest_does_not_write_reg(nir_dest *dest, void *void_reg)
+{
+ return dest->is_ssa || dest->reg.reg != void_reg;
+}
+
/* Attempts to coalesce the "move" from the given source of the vec to the
* destination of the instruction generating the value. If, for whatever
* reason, we cannot coalesce the mmove, it does nothing and returns 0. We
* can then call insert_mov as normal.
*/
static unsigned
-try_coalesce(nir_alu_instr *vec, unsigned start_idx)
+try_coalesce(nir_alu_instr *vec, unsigned start_idx, bool vec_had_ssa_dest)
{
assert(start_idx < nir_op_infos[vec->op].num_inputs);
@@ -183,6 +195,57 @@ try_coalesce(nir_alu_instr *vec, unsigned start_idx)
write_mask |= 1 << i;
}
+ if (!vec_had_ssa_dest) {
+ /* If the vec instruction had a register destination, then we need to be
+ * careful about moving writes to the source instruction. Otherwise, we
+ * may end up trying to coalesce in a case such as this:
+ *
+ * ssa_1 = fadd r1, r2
+ * r3.x = fneg(r2);
+ * r3 = vec4(ssa_1, ssa_1.y, ...)
+ *
+ * To deal with this, we walk the instructions between the vec and the
+ * ALU op we're going to coalesce it into and ensure that there are no
+ * access of the the destination register of the vec.
+ */
+
+ /* If they're not in the same block, there's not much we can do */
+ if (src_alu->instr.block != vec->instr.block)
+ return 0;
+
+ /* Since we know that src_alu dominates vec, we can just walk from
+ * one to the other.
+ */
+ for (nir_instr *instr = nir_instr_next(&src_alu->instr);
+ instr != &vec->instr; instr = nir_instr_next(instr)) {
+ if (instr->type == nir_instr_type_alu) {
+ nir_alu_instr *alu = nir_instr_as_alu(instr);
+ /* Only count this instructions write as a hazard if it's write
+ * mask overlaps with the write mask we are going to give alu_src
+ * if we can coalesce into it.
+ */
+ if (!alu->dest.dest.is_ssa &&
+ alu->dest.dest.reg.reg == vec->dest.dest.reg.reg &&
+ (alu->dest.write_mask & write_mask))
+ return 0;
+
+ for (unsigned j = 0; j < nir_op_infos[alu->op].num_inputs; j++) {
+ if (!alu->src[j].src.is_ssa &&
+ alu->src[j].src.reg.reg == vec->dest.dest.reg.reg)
+ return 0;
+ }
+ } else {
+ if (!nir_foreach_dest(instr, dest_does_not_write_reg,
+ vec->dest.dest.reg.reg))
+ return 0;
+
+ if (!nir_foreach_src(instr, src_does_not_read_reg,
+ vec->dest.dest.reg.reg))
+ return 0;
+ }
+ }
+ }
+
/* Stash off all of the ALU instruction's swizzles. */
uint8_t swizzles[4][4];
for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++)
@@ -274,8 +337,8 @@ lower_vec_to_movs_block(nir_block *block, nir_function_impl *impl)
* instruction in the source. We can only do this if the original
* vecN had an SSA destination.
*/
- if (vec_had_ssa_dest && !(finished_write_mask & (1 << i)))
- finished_write_mask |= try_coalesce(vec, i);
+ if (!(finished_write_mask & (1 << i)))
+ finished_write_mask |= try_coalesce(vec, i, vec_had_ssa_dest);
if (!(finished_write_mask & (1 << i)))
finished_write_mask |= insert_mov(vec, i, shader);
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list