[Mesa-dev] [PATCH 12/13] SQUASH: i965/fs: Set up fb-write payloads directly

Wed Apr 1 18:19:23 PDT 2015

Previously, we had a big helper function for setting up the color sources
of the LOAD_PAYLOAD instruction.  Now that lower_load_payload is much more
sane, all that complexity isn't needed anymore.  We can just stash sources
directly in the LOAD_PAYLOAD and trust lower_load_payload to do the right
thing with respect to saturate and COMPR4.
---
 src/mesa/drivers/dri/i965/brw_fs.h           |   2 -
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 166 ++++++---------------------
 2 files changed, 38 insertions(+), 130 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 72c3d7d..2679e3c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -374,8 +374,6 @@ public:
    bool optimize_frontfacing_ternary(nir_alu_instr *instr,
                                      const fs_reg &result);
 
-   int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-                           bool use_2nd_half);
    void emit_alpha_test();
    fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
                                  fs_reg src0_alpha, unsigned components,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index a0d8b798..9385a64 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3427,108 +3427,6 @@ fs_visitor::emit_interpolation_setup_gen6()
    this->current_annotation = NULL;
 }
 
-int
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
-                                bool use_2nd_half)
-{
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-   fs_inst *inst;
-
-   if (color.file == BAD_FILE) {
-      return 4 * (dispatch_width / 8);
-   }
-
-   uint8_t colors_enabled;
-   if (components == 0) {
-      /* We want to write one component to the alpha channel */
-      colors_enabled = 0x8;
-   } else {
-      /* Enable the first components-many channels */
-      colors_enabled = (1 << components) - 1;
-   }
-
-   if (dispatch_width == 8 || (brw->gen >= 6 && !do_dual_src)) {
-      /* SIMD8 write looks like:
-       * m + 0: r0
-       * m + 1: r1
-       * m + 2: g0
-       * m + 3: g1
-       *
-       * gen6 SIMD16 DP write looks like:
-       * m + 0: r0
-       * m + 1: r1
-       * m + 2: g0
-       * m + 3: g1
-       * m + 4: b0
-       * m + 5: b1
-       * m + 6: a0
-       * m + 7: a1
-       */
-      int len = 0;
-      for (unsigned i = 0; i < 4; ++i) {
-         if (colors_enabled & (1 << i)) {
-            dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
-                              color.type, color.width);
-            inst = emit(MOV(dst[len], offset(color, i)));
-            inst->saturate = key->clamp_fragment_color;
-         }
-         len++;
-      }
-      return len;
-   } else if (brw->gen >= 6 && do_dual_src) {
-      /* SIMD16 dual source blending for gen6+.
-       *
-       * From the SNB PRM, volume 4, part 1, page 193:
-       *
-       * "The dual source render target messages only have SIMD8 forms due to
-       *  maximum message length limitations. SIMD16 pixel shaders must send two
-       *  of these messages to cover all of the pixels. Each message contains
-       *  two colors (4 channels each) for each pixel in the message payload."
-       *
-       * So in SIMD16 dual source blending we will send 2 SIMD8 messages,
-       * each one will call this function twice (one for each color involved),
-       * so in each pass we only write 4 registers. Notice that the second
-       * SIMD8 message needs to read color data from the 2nd half of the color
-       * registers, so it needs to call this with use_2nd_half = true.
-       */
-      for (unsigned i = 0; i < 4; ++i) {
-         if (colors_enabled & (1 << i)) {
-            dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
-            inst = emit(MOV(dst[i], half(offset(color, i),
-                                         use_2nd_half ? 1 : 0)));
-            inst->saturate = key->clamp_fragment_color;
-            if (use_2nd_half)
-               inst->force_sechalf = true;
-         }
-      }
-      return 4;
-   } else {
-      /* pre-gen6 SIMD16 single source DP write looks like:
-       * m + 0: r0
-       * m + 1: g0
-       * m + 2: b0
-       * m + 3: a0
-       * m + 4: r1
-       * m + 5: g1
-       * m + 6: b1
-       * m + 7: a1
-       */
-      for (unsigned i = 0; i < 4; ++i) {
-         if (colors_enabled & (1 << i)) {
-            dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
-            inst = emit(MOV(dst[i], half(offset(color, i), 0)));
-            inst->saturate = key->clamp_fragment_color;
-
-            dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
-            inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
-            inst->saturate = key->clamp_fragment_color;
-            inst->force_sechalf = true;
-         }
-      }
-      return 8;
-   }
-}
-
 static enum brw_conditional_mod
 cond_for_alpha_func(GLenum func)
 {
@@ -3594,7 +3492,6 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
 
    this->current_annotation = "FB write header";
    int header_size = 2, payload_header_size;
-   int reg_size = exec_size / 8;
 
    /* We can potentially have a message length of up to 15, so we have to set
     * base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3650,24 +3547,35 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
        * alpha out the pipeline to our null renderbuffer to support
        * alpha-testing, alpha-to-coverage, and so on.
        */
-      length += setup_color_payload(sources + length, this->outputs[0], 0,
-                                    false);
+      if (this->outputs[0].file != BAD_FILE)
+         sources[length + 3] = offset(this->outputs[0], 3);
+      length += 4;
    } else if (color1.file == BAD_FILE) {
-      if (src0_alpha.file != BAD_FILE) {
-         sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
-                                  src0_alpha.type, src0_alpha.width);
-         fs_inst *inst = emit(MOV(sources[length], src0_alpha));
-         inst->saturate = key->clamp_fragment_color;
-         length++;
-      }
+      if (src0_alpha.file != BAD_FILE)
+         sources[length++] = src0_alpha;
 
-      length += setup_color_payload(sources + length, color0, components,
-                                    false);
+      for (unsigned i = 0; i < components; i++)
+         sources[length + i] = offset(color0, i);
+      length += 4;
    } else {
-      length += setup_color_payload(sources + length, color0, components,
-                                    use_2nd_half);
-      length += setup_color_payload(sources + length, color1, components,
-                                    use_2nd_half);
+      if (exec_size < dispatch_width) {
+         unsigned half_idx = use_2nd_half ? 1 : 0;
+         for (unsigned i = 0; i < components; i++)
+            sources[length + i] = half(offset(color0, i), half_idx);
+         length += 4;
+
+         for (unsigned i = 0; i < components; i++)
+            sources[length + i] = half(offset(color1, i), half_idx);
+         length += 4;
+      } else {
+         for (unsigned i = 0; i < components; i++)
+            sources[length + i] = offset(color0, i);
+         length += 4;
+
+         for (unsigned i = 0; i < components; i++)
+            sources[length + i] = offset(color1, i);
+         length += 4;
+      }
    }
 
    if (source_depth_to_render_target) {
@@ -3680,25 +3588,19 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
 	 no16("Missing support for simd16 depth writes on gen6\n");
       }
 
-      sources[length] = vgrf(glsl_type::float_type);
       if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
 	 /* Hand over gl_FragDepth. */
 	 assert(this->frag_depth.file != BAD_FILE);
-	 emit(MOV(sources[length], this->frag_depth));
+         sources[length] = this->frag_depth;
       } else {
 	 /* Pass through the payload depth. */
-	 emit(MOV(sources[length],
-                  fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+         sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
       }
       length++;
    }
 
-   if (payload.dest_depth_reg) {
-      sources[length] = vgrf(glsl_type::float_type);
-      emit(MOV(sources[length],
-               fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
-      length++;
-   }
+   if (payload.dest_depth_reg)
+      sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
 
    fs_inst *load;
    fs_inst *write;
@@ -3714,10 +3616,18 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
       /* Send from the MRF */
       load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
                                sources, length, payload_header_size));
+
+      /* On pre-SNB, we have to interlace the color values.  LOAD_PAYLOAD
+       * will do this for us if we just give it a COMPR4 destination.
+       */
+      if (brw->gen < 6 && exec_size == 16)
+         load->dst.reg |= BRW_MRF_COMPR4;
+
       write = emit(FS_OPCODE_FB_WRITE);
       write->exec_size = exec_size;
       write->base_mrf = 1;
    }
+   load->saturate = key->clamp_fragment_color;
 
    write->mlen = load->regs_written;
    write->header_size = header_size;
-- 
2.3.4