[Mesa-dev] [PATCH v3 5/7] ac: add LLVM build functions for subgroup instrinsics
Nicolai Hähnle
nhaehnle at gmail.com
Sun Apr 15 19:13:37 UTC 2018
I'm a bit late, but just some stylistic nitpicks for the future:
As a general stylistic note, prefer `false' as the last argument to
LLVMConstInt (since it's a boolean, whether to sign-extend).
[snip]
> +{
> + ac_build_optimization_barrier(ctx, &src);
> + LLVMValueRef result;
> + LLVMValueRef identity = get_reduction_identity(ctx, op,
> + ac_get_type_size(LLVMTypeOf(src)));
> + result = LLVMBuildBitCast(ctx->builder,
> + ac_build_set_inactive(ctx, src, identity),
> + LLVMTypeOf(identity), "");
Weird whitespace, are you using a non-default tab width?
> + result = ac_build_scan(ctx, op, result, identity);
> +
> + return ac_build_wwm(ctx, result);
> +}
> +
> +LLVMValueRef
> +ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op)
> +{
> + ac_build_optimization_barrier(ctx, &src);
> + LLVMValueRef result;
> + LLVMValueRef identity = get_reduction_identity(ctx, op,
> + ac_get_type_size(LLVMTypeOf(src)));
> + result = LLVMBuildBitCast(ctx->builder,
> + ac_build_set_inactive(ctx, src, identity),
> + LLVMTypeOf(identity), "");
> + result = ac_build_dpp(ctx, identity, result, dpp_wf_sr1, 0xf, 0xf, false);
> + result = ac_build_scan(ctx, op, result, identity);
> +
> + return ac_build_wwm(ctx, result);
> +}
> +
> +LLVMValueRef
> +ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size)
> +{
> + if (cluster_size == 1) return src;
Prefer to put the return on a new line, please.
Thanks,
Nicolai
> + ac_build_optimization_barrier(ctx, &src);
> + LLVMValueRef result, swap;
> + LLVMValueRef identity = get_reduction_identity(ctx, op,
> + ac_get_type_size(LLVMTypeOf(src)));
> + result = LLVMBuildBitCast(ctx->builder,
> + ac_build_set_inactive(ctx, src, identity),
> + LLVMTypeOf(identity), "");
Weird whitespace again.
With those style nitpicks fixed,
Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> + swap = ac_build_quad_swizzle(ctx, result, 1, 0, 3, 2);
> + result = ac_build_alu_op(ctx, result, swap, op);
> + if (cluster_size == 2) return ac_build_wwm(ctx, result);
> +
> + swap = ac_build_quad_swizzle(ctx, result, 2, 3, 0, 1);
> + result = ac_build_alu_op(ctx, result, swap, op);
> + if (cluster_size == 4) return ac_build_wwm(ctx, result);
> +
> + if (ctx->chip_class >= VI)
> + swap = ac_build_dpp(ctx, identity, result, dpp_row_half_mirror, 0xf, 0xf, false);
> + else
> + swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x04));
> + result = ac_build_alu_op(ctx, result, swap, op);
> + if (cluster_size == 8) return ac_build_wwm(ctx, result);
> +
> + if (ctx->chip_class >= VI)
> + swap = ac_build_dpp(ctx, identity, result, dpp_row_mirror, 0xf, 0xf, false);
> + else
> + swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x08));
> + result = ac_build_alu_op(ctx, result, swap, op);
> + if (cluster_size == 16) return ac_build_wwm(ctx, result);
> +
> + if (ctx->chip_class >= VI && cluster_size != 32)
> + swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast15, 0xa, 0xf, false);
> + else
> + swap = ac_build_ds_swizzle(ctx, result, ds_pattern_bitmode(0x1f, 0, 0x10));
> + result = ac_build_alu_op(ctx, result, swap, op);
> + if (cluster_size == 32) return ac_build_wwm(ctx, result);
> +
> + if (ctx->chip_class >= VI) {
> + swap = ac_build_dpp(ctx, identity, result, dpp_row_bcast31, 0xc, 0xf, false);
> + result = ac_build_alu_op(ctx, result, swap, op);
> + result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 63, 0));
> + return ac_build_wwm(ctx, result);
> + } else {
> + swap = ac_build_readlane(ctx, result, ctx->i32_0);
> + result = ac_build_readlane(ctx, result, LLVMConstInt(ctx->i32, 32, 0));
> + result = ac_build_alu_op(ctx, result, swap, op);
> + return ac_build_wwm(ctx, result);
> + }
> +}
> +
> +LLVMValueRef
> +ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
> + unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
> +{
> + unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
> + if (ctx->chip_class >= VI && HAVE_LLVM >= 0x0600) {
> + return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
> + } else {
> + return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
> + }
> +}
> +
> +LLVMValueRef
> +ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index)
> +{
> + index = LLVMBuildMul(ctx->builder, index, LLVMConstInt(ctx->i32, 4, 0), "");
> + return ac_build_intrinsic(ctx,
> + "llvm.amdgcn.ds.bpermute", ctx->i32,
> + (LLVMValueRef []) {index, src}, 2,
> + AC_FUNC_ATTR_READNONE |
> + AC_FUNC_ATTR_CONVERGENT);
> +}
> diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> index 8b35028a31..44811ce6d9 100644
> --- a/src/amd/common/ac_llvm_build.h
> +++ b/src/amd/common/ac_llvm_build.h
> @@ -27,7 +27,7 @@
>
> #include <stdbool.h>
> #include <llvm-c/TargetMachine.h>
> -
> +#include <nir/nir.h>
> #include "amd_family.h"
>
> #ifdef __cplusplus
> @@ -417,6 +417,34 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
> void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
> LLVMValueRef *addr, bool is_array_tex);
>
> +LLVMValueRef
> +ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
> +
> +LLVMValueRef
> +ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
> +
> +LLVMValueRef
> +ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
> +
> +LLVMValueRef
> +ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
> +
> +LLVMValueRef
> +ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
> +
> +LLVMValueRef
> +ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
> +
> +LLVMValueRef
> +ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
> +
> +LLVMValueRef
> +ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
> + unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
> +
> +LLVMValueRef
> +ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
> +
> #ifdef __cplusplus
> }
> #endif
>
--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
More information about the mesa-dev
mailing list