[Mesa-dev] [PATCH 12/13] SQUASH: i965/fs: Set up fb-write payloads directly
Jason Ekstrand
jason at jlekstrand.net
Wed Apr 1 18:19:23 PDT 2015
Previously, we had a big helper function for setting up the color sources
of the LOAD_PAYLOAD instruction. Now that lower_load_payload is much more
sane, all that complexity isn't needed anymore. We can just stash sources
directly in the LOAD_PAYLOAD and trust lower_load_payload to do the right
thing with respect to saturate and COMPR4.
---
src/mesa/drivers/dri/i965/brw_fs.h | 2 -
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 166 ++++++---------------------
2 files changed, 38 insertions(+), 130 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 72c3d7d..2679e3c 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -374,8 +374,6 @@ public:
bool optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result);
- int setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
- bool use_2nd_half);
void emit_alpha_test();
fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
fs_reg src0_alpha, unsigned components,
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index a0d8b798..9385a64 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -3427,108 +3427,6 @@ fs_visitor::emit_interpolation_setup_gen6()
this->current_annotation = NULL;
}
-int
-fs_visitor::setup_color_payload(fs_reg *dst, fs_reg color, unsigned components,
- bool use_2nd_half)
-{
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- fs_inst *inst;
-
- if (color.file == BAD_FILE) {
- return 4 * (dispatch_width / 8);
- }
-
- uint8_t colors_enabled;
- if (components == 0) {
- /* We want to write one component to the alpha channel */
- colors_enabled = 0x8;
- } else {
- /* Enable the first components-many channels */
- colors_enabled = (1 << components) - 1;
- }
-
- if (dispatch_width == 8 || (brw->gen >= 6 && !do_dual_src)) {
- /* SIMD8 write looks like:
- * m + 0: r0
- * m + 1: r1
- * m + 2: g0
- * m + 3: g1
- *
- * gen6 SIMD16 DP write looks like:
- * m + 0: r0
- * m + 1: r1
- * m + 2: g0
- * m + 3: g1
- * m + 4: b0
- * m + 5: b1
- * m + 6: a0
- * m + 7: a1
- */
- int len = 0;
- for (unsigned i = 0; i < 4; ++i) {
- if (colors_enabled & (1 << i)) {
- dst[len] = fs_reg(GRF, alloc.allocate(color.width / 8),
- color.type, color.width);
- inst = emit(MOV(dst[len], offset(color, i)));
- inst->saturate = key->clamp_fragment_color;
- }
- len++;
- }
- return len;
- } else if (brw->gen >= 6 && do_dual_src) {
- /* SIMD16 dual source blending for gen6+.
- *
- * From the SNB PRM, volume 4, part 1, page 193:
- *
- * "The dual source render target messages only have SIMD8 forms due to
- * maximum message length limitations. SIMD16 pixel shaders must send two
- * of these messages to cover all of the pixels. Each message contains
- * two colors (4 channels each) for each pixel in the message payload."
- *
- * So in SIMD16 dual source blending we will send 2 SIMD8 messages,
- * each one will call this function twice (one for each color involved),
- * so in each pass we only write 4 registers. Notice that the second
- * SIMD8 message needs to read color data from the 2nd half of the color
- * registers, so it needs to call this with use_2nd_half = true.
- */
- for (unsigned i = 0; i < 4; ++i) {
- if (colors_enabled & (1 << i)) {
- dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
- inst = emit(MOV(dst[i], half(offset(color, i),
- use_2nd_half ? 1 : 0)));
- inst->saturate = key->clamp_fragment_color;
- if (use_2nd_half)
- inst->force_sechalf = true;
- }
- }
- return 4;
- } else {
- /* pre-gen6 SIMD16 single source DP write looks like:
- * m + 0: r0
- * m + 1: g0
- * m + 2: b0
- * m + 3: a0
- * m + 4: r1
- * m + 5: g1
- * m + 6: b1
- * m + 7: a1
- */
- for (unsigned i = 0; i < 4; ++i) {
- if (colors_enabled & (1 << i)) {
- dst[i] = fs_reg(GRF, alloc.allocate(1), color.type);
- inst = emit(MOV(dst[i], half(offset(color, i), 0)));
- inst->saturate = key->clamp_fragment_color;
-
- dst[i + 4] = fs_reg(GRF, alloc.allocate(1), color.type);
- inst = emit(MOV(dst[i + 4], half(offset(color, i), 1)));
- inst->saturate = key->clamp_fragment_color;
- inst->force_sechalf = true;
- }
- }
- return 8;
- }
-}
-
static enum brw_conditional_mod
cond_for_alpha_func(GLenum func)
{
@@ -3594,7 +3492,6 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
this->current_annotation = "FB write header";
int header_size = 2, payload_header_size;
- int reg_size = exec_size / 8;
/* We can potentially have a message length of up to 15, so we have to set
* base_mrf to either 0 or 1 in order to fit in m0..m15.
@@ -3650,24 +3547,35 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
* alpha out the pipeline to our null renderbuffer to support
* alpha-testing, alpha-to-coverage, and so on.
*/
- length += setup_color_payload(sources + length, this->outputs[0], 0,
- false);
+ if (this->outputs[0].file != BAD_FILE)
+ sources[length + 3] = offset(this->outputs[0], 3);
+ length += 4;
} else if (color1.file == BAD_FILE) {
- if (src0_alpha.file != BAD_FILE) {
- sources[length] = fs_reg(GRF, alloc.allocate(reg_size),
- src0_alpha.type, src0_alpha.width);
- fs_inst *inst = emit(MOV(sources[length], src0_alpha));
- inst->saturate = key->clamp_fragment_color;
- length++;
- }
+ if (src0_alpha.file != BAD_FILE)
+ sources[length++] = src0_alpha;
- length += setup_color_payload(sources + length, color0, components,
- false);
+ for (unsigned i = 0; i < components; i++)
+ sources[length + i] = offset(color0, i);
+ length += 4;
} else {
- length += setup_color_payload(sources + length, color0, components,
- use_2nd_half);
- length += setup_color_payload(sources + length, color1, components,
- use_2nd_half);
+ if (exec_size < dispatch_width) {
+ unsigned half_idx = use_2nd_half ? 1 : 0;
+ for (unsigned i = 0; i < components; i++)
+ sources[length + i] = half(offset(color0, i), half_idx);
+ length += 4;
+
+ for (unsigned i = 0; i < components; i++)
+ sources[length + i] = half(offset(color1, i), half_idx);
+ length += 4;
+ } else {
+ for (unsigned i = 0; i < components; i++)
+ sources[length + i] = offset(color0, i);
+ length += 4;
+
+ for (unsigned i = 0; i < components; i++)
+ sources[length + i] = offset(color1, i);
+ length += 4;
+ }
}
if (source_depth_to_render_target) {
@@ -3680,25 +3588,19 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
no16("Missing support for simd16 depth writes on gen6\n");
}
- sources[length] = vgrf(glsl_type::float_type);
if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
/* Hand over gl_FragDepth. */
assert(this->frag_depth.file != BAD_FILE);
- emit(MOV(sources[length], this->frag_depth));
+ sources[length] = this->frag_depth;
} else {
/* Pass through the payload depth. */
- emit(MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.source_depth_reg, 0))));
+ sources[length] = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
}
length++;
}
- if (payload.dest_depth_reg) {
- sources[length] = vgrf(glsl_type::float_type);
- emit(MOV(sources[length],
- fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0))));
- length++;
- }
+ if (payload.dest_depth_reg)
+ sources[length++] = fs_reg(brw_vec8_grf(payload.dest_depth_reg, 0));
fs_inst *load;
fs_inst *write;
@@ -3714,10 +3616,18 @@ fs_visitor::emit_single_fb_write(fs_reg color0, fs_reg color1,
/* Send from the MRF */
load = emit(LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size),
sources, length, payload_header_size));
+
+ /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD
+ * will do this for us if we just give it a COMPR4 destination.
+ */
+ if (brw->gen < 6 && exec_size == 16)
+ load->dst.reg |= BRW_MRF_COMPR4;
+
write = emit(FS_OPCODE_FB_WRITE);
write->exec_size = exec_size;
write->base_mrf = 1;
}
+ load->saturate = key->clamp_fragment_color;
write->mlen = load->regs_written;
write->header_size = header_size;
--
2.3.4
More information about the mesa-dev
mailing list