[Mesa-dev] [PATCH 4/4] i965: Implement SIMD16 texturing on Gen4.
Jordan Justen
jordan.l.justen at intel.com
Sat Apr 4 16:28:49 PDT 2015
On 2015-04-04 01:23:28, Kenneth Graunke wrote:
> This allows SIMD16 mode to work for a lot more programs. Texturing is
> also more efficient in SIMD16 mode than SIMD8. Several messages don't
> actually exist in SIMD8 mode, so we did SIMD16 messages and threw away
> half of the data. Now we compute real data in both halves.
>
> Also, the SIMD16 "sample" message doesn't require all three coordinate
> components to exist (like the SIMD8 one), so we can shorten the message
> lengths, cutting register usage a bit.
>
> I chose to implement the visitor functionality in a separate function,
> since mixing true SIMD16 with SIMD8 code that uses SIMD16 fallbacks
> seemed like a mess. The new code bails on a few cases where we'd
> have to do two SIMD8 messages - we just fall back to SIMD8 for now.
>
> Improves performance in "Shadowrun: Dragonfall - Director's Cut" by
> about 20% on GM45 (measured with LIBGL_SHOW_FPS=1 while standing around
> in the first mission).
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_fs.h | 4 ++
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 28 ++++++++---
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 68 +++++++++++++++++++++++++-
> 3 files changed, 90 insertions(+), 10 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index 278a8ee..cfdbf55 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -271,6 +271,10 @@ public:
> fs_reg shadow_comp,
> fs_reg lod, fs_reg lod2, int grad_components,
> uint32_t sampler);
> + fs_inst *emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
> + fs_reg coordinate, int vector_elements,
> + fs_reg shadow_c, fs_reg lod,
> + uint32_t sampler);
> fs_inst *emit_texture_gen5(ir_texture_opcode op, fs_reg dst,
> fs_reg coordinate, int coord_components,
> fs_reg shadow_comp,
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> index 40e51aa..2743297 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
> @@ -622,16 +622,26 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
> /* Note that G45 and older determines shadow compare and dispatch width
> * from message length for most messages.
> */
> - assert(dispatch_width == 8);
> - msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> - if (inst->shadow_compare) {
> - assert(inst->mlen == 6);
> - } else {
> - assert(inst->mlen <= 4);
> - }
> + if (dispatch_width == 8) {
> + msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
> + if (inst->shadow_compare) {
> + assert(inst->mlen == 6);
> + } else {
> + assert(inst->mlen <= 4);
> + }
> + } else {
> + if (inst->shadow_compare) {
> + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
> + assert(inst->mlen == 9);
> + } else {
> + msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
> + assert(inst->mlen <= 7 && inst->mlen % 2 == 1);
> + }
> + }
> break;
> case FS_OPCODE_TXB:
> if (inst->shadow_compare) {
> + assert(dispatch_width == 8);
> assert(inst->mlen == 6);
> msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE;
> } else {
> @@ -642,6 +652,7 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
> break;
> case SHADER_OPCODE_TXL:
> if (inst->shadow_compare) {
> + assert(dispatch_width == 8);
> assert(inst->mlen == 6);
> msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE;
> } else {
> @@ -652,11 +663,12 @@ fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src
> break;
> case SHADER_OPCODE_TXD:
> /* There is no sample_d_c message; comparisons are done manually */
> + assert(dispatch_width == 8);
> assert(inst->mlen == 7 || inst->mlen == 10);
> msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS;
> break;
> case SHADER_OPCODE_TXF:
> - assert(inst->mlen == 9);
> + assert(inst->mlen <= 9 && inst->mlen % 2 == 1);
> msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD;
> simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
> break;
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 8c0ec33..25c424a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -1435,8 +1435,6 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
> bool simd16 = false;
> fs_reg orig_dst;
>
> - no16("SIMD16 texturing on Gen4 not supported yet.");
> -
> /* g0 header. */
> mlen = 1;
>
> @@ -1588,6 +1586,69 @@ fs_visitor::emit_texture_gen4(ir_texture_opcode op, fs_reg dst,
> return inst;
> }
>
> +fs_inst *
> +fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst,
> + fs_reg coordinate, int vector_elements,
> + fs_reg shadow_c, fs_reg lod,
> + uint32_t sampler)
> +{
> + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width);
> + bool has_lod = op == ir_txl || op == ir_txb;
> +
> + if (has_lod && shadow_c.file != BAD_FILE)
> + no16("TXB and TXL with shadow comparison unsupported in SIMD16.");
> +
> + if (op == ir_txd)
> + no16("textureGrad unsupported in SIMD16.");
> +
> + /* Copy the coordinates. */
> + for (int i = 0; i < vector_elements; i++) {
> + emit(MOV(retype(offset(message, i), coordinate.type), coordinate));
> + coordinate = offset(coordinate, 1);
> + }
> +
> + fs_reg msg_end = offset(message, vector_elements);
> +
> + /* Messages other than sample and ld require all three components */
> + if (has_lod || shadow_c.file != BAD_FILE) {
> + for (int i = vector_elements; i < 3; i++) {
> + emit(MOV(offset(message, i), fs_reg(0.0f)));
> + }
> + }
> +
> + if (has_lod) {
> + fs_reg msg_lod = retype(offset(message, 3), op == ir_txf ?
> + BRW_REGISTER_TYPE_UD : BRW_REGISTER_TYPE_F);
From above: has_lod = op == ir_txl || op == ir_txb, so the
op == ir_txf check here should always be false, right?
Should has_lod also check for ir_txf?
Otherwise,
Series Reviewed-by: Jordan Justen <jordan.l.justen at intel.com>
> + emit(MOV(msg_lod, lod));
> + msg_end = offset(msg_lod, 1);
> + }
> +
> + if (shadow_c.file != BAD_FILE) {
> + fs_reg msg_ref = offset(message, 3 + has_lod);
> + emit(MOV(msg_ref, shadow_c));
> + msg_end = offset(msg_ref, 1);
> + }
> +
> + enum opcode opcode;
> + switch (op) {
> + case ir_tex: opcode = SHADER_OPCODE_TEX; break;
> + case ir_txb: opcode = FS_OPCODE_TXB; break;
> + case ir_txd: opcode = SHADER_OPCODE_TXD; break;
> + case ir_txl: opcode = SHADER_OPCODE_TXL; break;
> + case ir_txs: opcode = SHADER_OPCODE_TXS; break;
> + case ir_txf: opcode = SHADER_OPCODE_TXF; break;
> + default: unreachable("not reached");
> + }
> +
> + fs_inst *inst = emit(opcode, dst, reg_undef, fs_reg(sampler));
> + inst->base_mrf = message.reg - 1;
> + inst->mlen = msg_end.reg - inst->base_mrf;
> + inst->header_present = true;
> + inst->regs_written = 8;
> +
> + return inst;
> +}
> +
> /* gen5's sampler has slots for u, v, r, array index, then optional
> * parameters like shadow comparitor or LOD bias. If optional
> * parameters aren't present, those base slots are optional and don't
> @@ -2150,6 +2211,9 @@ fs_visitor::emit_texture(ir_texture_opcode op,
> shadow_c, lod, lod2, grad_components,
> sample_index, sampler,
> offset_value.file != BAD_FILE);
> + } else if (dispatch_width == 16) {
> + inst = emit_texture_gen4_simd16(op, dst, coordinate, coord_components,
> + shadow_c, lod, sampler);
> } else {
> inst = emit_texture_gen4(op, dst, coordinate, coord_components,
> shadow_c, lod, lod2, grad_components,
> --
> 2.1.2
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list