[Mesa-dev] [PATCH 4/5] i965/vs: Allow CSE to handle MULs with negated arguments.
Ian Romanick
idr at freedesktop.org
Wed Apr 8 16:38:30 PDT 2015
From: Ian Romanick <ian.d.romanick at intel.com>
This is similar to commit (47c4b38: i965/fs: Allow CSE to handle MULs
with negated arguments.), but it uses a slightly different approach.
Shader-db results:
GM45:
total instructions in shared programs: 4060813 -> 4060151 (-0.02%)
instructions in affected programs: 13448 -> 12786 (-4.92%)
helped: 62
HURT: 9
All other results, except Broadwell, were identical to GM45 w/o NIR.
Since NIR isn't used for VEC4, this makes sense.
Broadwell:
total instructions in shared programs: 7284561 -> 7284540 (-0.00%)
instructions in affected programs: 1272 -> 1251 (-1.65%)
helped: 12
Broadwell NIR:
total instructions in shared programs: 7500487 -> 7500487 (0.00%)
instructions in affected programs: 0 -> 0
Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
---
src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 32 +++++++++++++++++++++++++-----
1 file changed, 27 insertions(+), 5 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
index 100e511..49b50a7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_cse.cpp
@@ -90,15 +90,34 @@ is_expression(const vec4_instruction *const inst)
}
static bool
-operands_match(const vec4_instruction *a, const vec4_instruction *b)
+operands_match(const vec4_instruction *a, const vec4_instruction *b,
+ bool *negate)
{
const src_reg *xs = a->src;
const src_reg *ys = b->src;
+ *negate = false;
+
if (a->opcode == BRW_OPCODE_MAD) {
return xs[0].equals(ys[0]) &&
((xs[1].equals(ys[1]) && xs[2].equals(ys[2])) ||
(xs[2].equals(ys[1]) && xs[1].equals(ys[2])));
+ } else if (a->opcode == BRW_OPCODE_MUL) {
+ if ((xs[0].equals(ys[0]) && xs[1].equals(ys[1])) ||
+ (xs[1].equals(ys[0]) && xs[0].equals(ys[1])) ||
+ (xs[0].negative_equals(ys[0]) && xs[1].negative_equals(ys[1])) ||
+ (xs[1].negative_equals(ys[0]) && xs[0].negative_equals(ys[1])))
+ return true;
+
+ if ((xs[0].equals(ys[0]) && xs[1].negative_equals(ys[1])) ||
+ (xs[1].equals(ys[0]) && xs[0].negative_equals(ys[1])) ||
+ (xs[0].negative_equals(ys[0]) && xs[1].equals(ys[1])) ||
+ (xs[1].negative_equals(ys[0]) && xs[0].equals(ys[1]))) {
+ *negate = true;
+ return true;
+ }
+
+ return false;
} else if (!a->is_commutative()) {
return xs[0].equals(ys[0]) && xs[1].equals(ys[1]) && xs[2].equals(ys[2]);
} else {
@@ -108,7 +127,7 @@ operands_match(const vec4_instruction *a, const vec4_instruction *b)
}
static bool
-instructions_match(vec4_instruction *a, vec4_instruction *b)
+instructions_match(vec4_instruction *a, vec4_instruction *b, bool *negate)
{
return a->opcode == b->opcode &&
a->saturate == b->saturate &&
@@ -117,7 +136,7 @@ instructions_match(vec4_instruction *a, vec4_instruction *b)
a->dst.writemask == b->dst.writemask &&
a->force_writemask_all == b->force_writemask_all &&
a->regs_written == b->regs_written &&
- operands_match(a, b);
+ operands_match(a, b, negate);
}
bool
@@ -135,11 +154,12 @@ vec4_visitor::opt_cse_local(bblock_t *block)
(inst->dst.file != HW_REG || inst->dst.is_null()))
{
bool found = false;
+ bool negate;
foreach_in_list_use_after(aeb_entry, entry, &aeb) {
/* Match current instruction's expression against those in AEB. */
if (!(entry->generator->dst.is_null() && !inst->dst.is_null()) &&
- instructions_match(inst, entry->generator)) {
+ instructions_match(inst, entry->generator, &negate)) {
found = true;
progress = true;
break;
@@ -186,6 +206,7 @@ vec4_visitor::opt_cse_local(bblock_t *block)
vec4_instruction *copy = MOV(offset(inst->dst, i),
offset(entry->tmp, i));
copy->force_writemask_all = inst->force_writemask_all;
+ copy->src[0].negate = negate;
inst->insert_before(block, copy);
}
}
@@ -206,9 +227,10 @@ vec4_visitor::opt_cse_local(bblock_t *block)
* the flag register if we just wrote it.
*/
if (inst->writes_flag()) {
+ bool negate; /* dummy */
if (entry->generator->reads_flag() ||
(entry->generator->writes_flag() &&
- !instructions_match(inst, entry->generator))) {
+ !instructions_match(inst, entry->generator, &negate))) {
entry->remove();
ralloc_free(entry);
continue;
--
2.1.0
More information about the mesa-dev
mailing list