[Mesa-dev] [PATCH 1/7] i965: Introduce the BROADCAST pseudo-opcode.
Matt Turner
mattst88 at gmail.com
Wed Apr 29 22:29:46 PDT 2015
On Fri, Feb 20, 2015 at 11:48 AM, Francisco Jerez <currojerez at riseup.net> wrote:
> The BROADCAST instruction picks the channel from its first source
> given by an index passed in as second source. This will be used in
> situations where all channels from the same SIMD thread have to agree
> on the value of something, e.g. a surface binding table index.
> ---
> src/mesa/drivers/dri/i965/brw_defines.h | 6 ++
> src/mesa/drivers/dri/i965/brw_eu.h | 6 ++
> src/mesa/drivers/dri/i965/brw_eu_emit.c | 77 ++++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 4 ++
> src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +
> src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 4 ++
> 6 files changed, 100 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
> index 17c27dd..d4930e3 100644
> --- a/src/mesa/drivers/dri/i965/brw_defines.h
> +++ b/src/mesa/drivers/dri/i965/brw_defines.h
> @@ -911,6 +911,12 @@ enum opcode {
>
> SHADER_OPCODE_URB_WRITE_SIMD8,
>
> + /**
> + * Pick the channel from its first source register given by the index
> + * specified as second source. Useful for variable indexing of surfaces.
> + */
> + SHADER_OPCODE_BROADCAST,
> +
> VEC4_OPCODE_MOV_BYTES,
> VEC4_OPCODE_PACK_BYTES,
> VEC4_OPCODE_UNPACK_UNIFORM,
> diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
> index a94ea42..2505480 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu.h
> +++ b/src/mesa/drivers/dri/i965/brw_eu.h
> @@ -413,6 +413,12 @@ brw_pixel_interpolator_query(struct brw_compile *p,
> unsigned msg_length,
> unsigned response_length);
>
> +void
> +brw_broadcast(struct brw_compile *p,
> + struct brw_reg dst,
> + struct brw_reg src,
> + struct brw_reg idx);
> +
> /***********************************************************************
> * brw_eu_util.c:
> */
> diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> index 1d6fd67..d7e3995 100644
> --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
> +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
> @@ -2854,6 +2854,83 @@ brw_pixel_interpolator_query(struct brw_compile *p,
> brw_inst_set_pi_message_data(brw, insn, data);
> }
>
> +void
> +brw_broadcast(struct brw_compile *p,
> + struct brw_reg dst,
> + struct brw_reg src,
> + struct brw_reg idx)
> +{
> + const struct brw_context *brw = p->brw;
> + const bool align1 = (brw_inst_access_mode(brw, p->current) == BRW_ALIGN_1);
Unnecessary parentheses.
> + brw_inst *inst;
> +
> + assert(src.file == BRW_GENERAL_REGISTER_FILE &&
> + src.address_mode == BRW_ADDRESS_DIRECT);
> +
> + if ((src.vstride == 0 && (src.hstride == 0 || !align1)) ||
> + idx.file == BRW_IMMEDIATE_VALUE) {
> + /* Trivial, the source is already uniform or the index is a constant.
> + * We will typically not get here if the optimizer is doing its job, but
> + * asserting would be mean.
> + */
> + const unsigned i = (idx.file == BRW_IMMEDIATE_VALUE ? idx.dw1.ud : 0);
Unnecessary parentheses.
> + brw_MOV(p, dst,
> + (align1 ? stride(suboffset(src, i), 0, 1, 0) :
> + stride(suboffset(src, 4 * i), 0, 4, 1)));
> +
Extra new line.
> + } else {
> + if (align1) {
> + const struct brw_reg addr =
> + retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD);
> + const unsigned offset = src.nr * REG_SIZE + src.subnr;
> + /* Limit in bytes of the signed indirect addressing immediate. */
> + const unsigned limit = 512;
> +
> + brw_push_insn_state(p);
> + brw_set_default_mask_control(p, BRW_MASK_DISABLE);
> + brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
> +
> + /* Take into account the component size and horizontal stride. */
> + assert(src.vstride == src.hstride + src.width);
> + brw_SHL(p, addr, vec1(idx),
> + brw_imm_ud(_mesa_logbase2(type_sz(src.type)) +
> + src.hstride - 1));
> +
> + /* We can only address up to limit bytes using the indirect
> + * addressing immediate, account for the difference if the source
> + * register is above this limit.
> + */
> + if (offset >= limit)
> + brw_ADD(p, addr, addr, brw_imm_ud(offset - offset % limit));
> +
> + brw_pop_insn_state(p);
> +
> + /* Use indirect addressing to fetch the specified component. */
> + brw_MOV(p, dst,
> + retype(brw_vec1_indirect(addr.subnr, offset % limit),
> + src.type));
> +
Extra new line.
Putting some of Ian's explanation for why this is needed into the
commit message might be good. I had to go read the piglit tests before
I really understood.
More information about the mesa-dev
mailing list