[Mesa-dev] [Mesa-stable] [PATCH] ac/nir: Add workaround for GFX9 buffer views.
Juan A. Suarez Romero
jasuarez at igalia.com
Wed Apr 11 14:25:23 UTC 2018
Hi, Bas.
Unfortunately, I can't apply this patch neither in next 17.3 release stable, nor
in 18.0, as this patch does not apply cleanly, and it requires other different
commits that didn't land in the stable branches.
For 17.3 I think it is probably not worth to try to provide a specific version
for the stable, as I'm already cooking the pre-release, and this is the latest
release for 17.3 series.
Maybe it is worth to provide a version for 18.0 branch, though. Probably it
wouldn't enter in this release, but surely in the next one which should happen
in 1 or 2 weeks.
For more information, for 18.0 this patch seems to require 1251f08ef ("ac: add
ac_build_buffer_load_common() helper"), bac9fa9f17f ("ac: add glc parameter to
ac_build_buffer_load_format"), and probably others.
J.A.
On Wed, 2018-04-11 at 11:26 +0200, Bas Nieuwenhuizen wrote:
> On Wed, Apr 4, 2018 at 10:19 PM, Bas Nieuwenhuizen
> <bas at basnieuwenhuizen.nl> wrote:
> > On GFX9 whether the buffer size is interpreted as elements or bytes
> > depends on whether IDXEN is enabled in the instruction. If the index
> > is a constant zero, LLVM optimizes IDXEN to 0.
> >
> > Now the size in elements is interpreted in bytes which of course
> > results in out of bounds accesses.
> >
> > The correct fix is most likely to disable the LLVM optimization,
> > but we need something to work with LLVM <= 6.0.
> >
> > radeonsi does the max between stride and element count on the CPU
> > but that results in the size intrinsics returning the wrong size
> > for the buffer. This would cause CTS errors for radv.
> >
> > v2: Also include the store changes.
> >
> > Fixes: e38685cc62e 'Revert "radv: disable support for VEGA for now."'
> > (backported from 4503ff760c794c3bb15b978a47c530037d56498e for 17.3)
> > ---
> > src/amd/common/ac_llvm_build.c | 20 ++++++++++++++++++++
> > src/amd/common/ac_llvm_build.h | 8 ++++++++
> > src/amd/common/ac_nir_to_llvm.c | 36 ++++++++++++++++++++++++++++++------
> > src/amd/common/ac_shader_abi.h | 4 ++++
> > 4 files changed, 62 insertions(+), 6 deletions(-)
> >
> > diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
> > index e5cd23e025..f193f71c3e 100644
> > --- a/src/amd/common/ac_llvm_build.c
> > +++ b/src/amd/common/ac_llvm_build.c
> > @@ -960,6 +960,26 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
> > AC_FUNC_ATTR_READONLY);
> > }
> >
> > +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
> > + LLVMValueRef rsrc,
> > + LLVMValueRef vindex,
> > + LLVMValueRef voffset,
> > + bool can_speculate)
> > +{
> > + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
> > + LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
> > + stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");
> > +
> > + LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
> > + LLVMBuildICmp(ctx->builder, LLVMIntUGT, elem_count, stride, ""),
> > + elem_count, stride, "");
> > +
> > + LLVMValueRef new_rsrc = LLVMBuildInsertElement(ctx->builder, rsrc, new_elem_count,
> > + LLVMConstInt(ctx->i32, 2, 0), "");
> > +
> > + return ac_build_buffer_load_format(ctx, new_rsrc, vindex, voffset, can_speculate);
> > +}
> > +
> > /**
> > * Set range metadata on an instruction. This can only be used on load and
> > * call instructions. If you know an instruction can only produce the values
> > diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
> > index aa2a2899ab..d4264f2879 100644
> > --- a/src/amd/common/ac_llvm_build.h
> > +++ b/src/amd/common/ac_llvm_build.h
> > @@ -188,6 +188,14 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
> > LLVMValueRef voffset,
> > bool can_speculate);
> >
> > +/* load_format that handles the stride & element count better if idxen is
> > + * disabled by LLVM. */
> > +LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
> > + LLVMValueRef rsrc,
> > + LLVMValueRef vindex,
> > + LLVMValueRef voffset,
> > + bool can_speculate);
> > +
> > LLVMValueRef
> > ac_get_thread_id(struct ac_llvm_context *ctx);
> >
> > diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
> > index b696cb8a45..8ee80b1a67 100644
> > --- a/src/amd/common/ac_nir_to_llvm.c
> > +++ b/src/amd/common/ac_nir_to_llvm.c
> > @@ -2257,11 +2257,19 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
> > struct ac_image_args *args)
> > {
> > if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
> > - return ac_build_buffer_load_format(&ctx->ac,
> > - args->resource,
> > - args->addr,
> > - LLVMConstInt(ctx->ac.i32, 0, false),
> > - true);
> > + if (ctx->abi->gfx9_stride_size_workaround) {
> > + return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
> > + args->resource,
> > + args->addr,
> > + ctx->ac.i32_0,
> > + true);
> > + } else {
> > + return ac_build_buffer_load_format(&ctx->ac,
> > + args->resource,
> > + args->addr,
> > + ctx->ac.i32_0,
> > + true);
> > + }
> > }
> >
> > args->opcode = ac_image_sample;
> > @@ -3613,8 +3621,23 @@ static void visit_image_store(struct ac_nir_context *ctx,
> > glc = i1true;
> >
> > if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
> > + LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
> > +
> > + if (ctx->abi->gfx9_stride_size_workaround) {
> > + LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
> > + LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
> > + stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
> > +
> > + LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
> > + LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
> > + elem_count, stride, "");
> > +
> > + rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
> > + LLVMConstInt(ctx->ac.i32, 2, 0), "");
> > + }
> > +
> > params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
> > - params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, true, true);
> > + params[1] = rsrc;
> > params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
> > ctx->ac.i32_0, ""); /* vindex */
> > params[3] = ctx->ac.i32_0; /* voffset */
> > @@ -6645,6 +6668,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
> > ctx.abi.load_ssbo = radv_load_ssbo;
> > ctx.abi.load_sampler_desc = radv_get_sampler_desc;
> > ctx.abi.clamp_shadow_reference = false;
> > + ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9;
> >
> > if (shader_count >= 2)
> > ac_init_exec_full_mask(&ctx.ac);
> > diff --git a/src/amd/common/ac_shader_abi.h b/src/amd/common/ac_shader_abi.h
> > index 14517d5570..b04dc076de 100644
> > --- a/src/amd/common/ac_shader_abi.h
> > +++ b/src/amd/common/ac_shader_abi.h
> > @@ -92,6 +92,10 @@ struct ac_shader_abi {
> > /* Whether to clamp the shadow reference value to [0,1]on VI. Radeonsi currently
> > * uses it due to promoting D16 to D32, but radv needs it off. */
> > bool clamp_shadow_reference;
> > +
> > + /* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
> > + * and LLVM optimizes an indexed load with constant index to IDXEN=0. */
> > + bool gfx9_stride_size_workaround;
> > };
> >
> > #endif /* AC_SHADER_ABI_H */
> > --
> > 2.16.2
> >
>
> _______________________________________________
> mesa-stable mailing list
> mesa-stable at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
More information about the mesa-dev
mailing list