[Mesa-dev] [PATCH 1/5] amd/common: pass new enum ac_image_dim to ac_build_image_opcode
Nicolai Hähnle
nhaehnle at gmail.com
Wed Apr 11 11:13:53 UTC 2018
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
This is in preparation for the new, dimension-aware LLVM image
intrinsics.
---
src/amd/common/ac_llvm_build.c | 10 ++++-
src/amd/common/ac_llvm_build.h | 13 +++++-
src/amd/common/ac_nir_to_llvm.c | 54 +++++++++++++++++++----
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 50 ++++++++++++++++++++-
4 files changed, 114 insertions(+), 13 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 32d8a02f562..2bf38f809bb 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -1433,36 +1433,42 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a)
{
LLVMValueRef args[11];
unsigned num_args = 0;
const char *name = NULL;
char intr_name[128], type[64];
bool sample = a->opcode == ac_image_sample ||
a->opcode == ac_image_gather4 ||
a->opcode == ac_image_get_lod;
+ bool da = a->dim == ac_image_cube ||
+ a->dim == ac_image_1darray ||
+ a->dim == ac_image_2darray ||
+ a->dim == ac_image_2darraymsaa;
+ if (a->opcode == ac_image_get_lod)
+ da = false;
if (sample)
args[num_args++] = ac_to_float(ctx, a->addr);
else
args[num_args++] = a->addr;
args[num_args++] = a->resource;
if (sample)
args[num_args++] = a->sampler;
args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
if (sample)
args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
args[num_args++] = ctx->i1false; /* glc */
args[num_args++] = ctx->i1false; /* slc */
args[num_args++] = ctx->i1false; /* lwe */
- args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
+ args[num_args++] = LLVMConstInt(ctx->i1, da, 0);
switch (a->opcode) {
case ac_image_sample:
name = "llvm.amdgcn.image.sample";
break;
case ac_image_gather4:
name = "llvm.amdgcn.image.gather4";
break;
case ac_image_load:
name = "llvm.amdgcn.image.load";
@@ -2450,21 +2456,21 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
* The sample index should be adjusted as follows:
* addr[sample_index] = (fmask >> (addr[sample_index] * 4)) & 0xF;
*/
void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
LLVMValueRef *addr, bool is_array_tex)
{
struct ac_image_args fmask_load = {};
fmask_load.opcode = ac_image_load;
fmask_load.resource = fmask;
fmask_load.dmask = 0xf;
- fmask_load.da = is_array_tex;
+ fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
LLVMValueRef fmask_addr[4];
memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3);
fmask_addr[3] = LLVMGetUndef(ac->i32);
fmask_load.addr = ac_build_gather_values(ac, fmask_addr,
is_array_tex ? 4 : 2);
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index 8b35028a314..a51390794a7 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -310,35 +310,46 @@ void ac_build_export_null(struct ac_llvm_context *ctx);
enum ac_image_opcode {
ac_image_sample,
ac_image_gather4,
ac_image_load,
ac_image_load_mip,
ac_image_get_lod,
ac_image_get_resinfo,
};
+enum ac_image_dim {
+ ac_image_1d,
+ ac_image_2d,
+ ac_image_3d,
+ ac_image_cube, // includes cube arrays
+ ac_image_1darray,
+ ac_image_2darray,
+ ac_image_2dmsaa,
+ ac_image_2darraymsaa,
+};
+
struct ac_image_args {
enum ac_image_opcode opcode;
+ enum ac_image_dim dim;
bool level_zero;
bool bias;
bool lod;
bool deriv;
bool compare;
bool offset;
LLVMValueRef resource;
LLVMValueRef sampler;
LLVMValueRef addr;
unsigned dmask;
bool unorm;
- bool da;
};
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a);
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index 7c2bd5c0cca..ce7afaf96b4 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -69,20 +69,59 @@ build_store_values_extended(struct ac_llvm_context *ac,
unsigned i;
for (i = 0; i < value_count; i++) {
LLVMValueRef ptr = values[i * value_stride];
LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
LLVMBuildStore(builder, value, ptr);
}
}
+static enum ac_image_dim
+get_ac_sampler_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim dim,
+ bool is_array)
+{
+ switch (dim) {
+ case GLSL_SAMPLER_DIM_1D:
+ if (ctx->chip_class >= GFX9)
+ return is_array ? ac_image_2darray : ac_image_2d;
+ return is_array ? ac_image_1darray : ac_image_1d;
+ case GLSL_SAMPLER_DIM_2D:
+ case GLSL_SAMPLER_DIM_RECT:
+ case GLSL_SAMPLER_DIM_SUBPASS:
+ case GLSL_SAMPLER_DIM_EXTERNAL:
+ return is_array ? ac_image_2darray : ac_image_2d;
+ case GLSL_SAMPLER_DIM_3D:
+ return ac_image_3d;
+ case GLSL_SAMPLER_DIM_CUBE:
+ return ac_image_cube;
+ case GLSL_SAMPLER_DIM_MS:
+ case GLSL_SAMPLER_DIM_SUBPASS_MS:
+ return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
+ default:
+ unreachable("bad sampler dim");
+ }
+}
+
+static enum ac_image_dim
+get_ac_image_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim sdim,
+ bool is_array)
+{
+ enum ac_image_dim dim = get_ac_sampler_dim(ctx, sdim, is_array);
+
+ if (dim == ac_image_cube ||
+ (ctx->chip_class <= VI && dim == ac_image_3d))
+ dim = ac_image_2darray;
+
+ return dim;
+}
+
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
const nir_ssa_def *def)
{
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
if (def->num_components > 1) {
type = LLVMVectorType(type, def->num_components);
}
return type;
}
@@ -1117,21 +1156,21 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
LLVMValueRef half_texel[2];
LLVMValueRef compare_cube_wa = NULL;
LLVMValueRef result;
int c;
unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
//TODO Rect
{
struct ac_image_args txq_args = { 0 };
- txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
+ txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array);
txq_args.opcode = ac_image_get_resinfo;
txq_args.dmask = 0xf;
txq_args.addr = ctx->i32_0;
txq_args.resource = args->resource;
LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
for (c = 0; c < 2; c++) {
half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
LLVMConstInt(ctx->i32, c, false), "");
half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
@@ -2048,21 +2087,21 @@ static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
fmask_load_address[0] = coord_x;
fmask_load_address[1] = coord_y;
if (coord_z) {
fmask_load_address[2] = coord_z;
fmask_load_address[3] = LLVMGetUndef(ctx->i32);
}
struct ac_image_args args = {0};
args.opcode = ac_image_load;
- args.da = coord_z ? true : false;
+ args.dim = coord_z ? ac_image_2darray : ac_image_2d;
args.resource = fmask_desc_ptr;
args.dmask = 0xf;
args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
res = ac_build_image_opcode(ctx, &args);
res = ac_to_integer(ctx, res);
LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
@@ -2390,21 +2429,22 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
}
static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = glsl_without_array(var->type);
struct ac_image_args args = { 0 };
- args.da = glsl_is_array_image(type);
+ args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type),
+ glsl_sampler_type_is_array(type));
args.dmask = 0xf;
args.resource = get_sampler_desc(ctx, instr->variables[0],
AC_DESC_IMAGE, NULL, true, false);
args.opcode = ac_image_get_resinfo;
args.addr = ctx->ac.i32_0;
return ac_build_image_opcode(&ctx->ac, &args);
}
static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
@@ -2414,21 +2454,22 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = glsl_without_array(var->type);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
return get_buffer_size(ctx,
get_sampler_desc(ctx, instr->variables[0],
AC_DESC_BUFFER, NULL, true, false), true);
struct ac_image_args args = { 0 };
- args.da = glsl_is_array_image(type);
+ args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
+ glsl_sampler_type_is_array(type));
args.dmask = 0xf;
args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
args.opcode = ac_image_get_resinfo;
args.addr = ctx->ac.i32_0;
res = ac_build_image_opcode(&ctx->ac, &args);
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
@@ -3161,44 +3202,41 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
static void set_tex_fetch_args(struct ac_llvm_context *ctx,
struct ac_image_args *args,
const nir_tex_instr *instr,
nir_texop op,
LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
LLVMValueRef *param, unsigned count,
unsigned dmask)
{
unsigned is_rect = 0;
- bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
- if (op == nir_texop_lod)
- da = false;
/* Pad to power of two vector */
while (count < util_next_power_of_two(count))
param[count++] = LLVMGetUndef(ctx->i32);
if (count > 1)
args->addr = ac_build_gather_values(ctx, param, count);
else
args->addr = param[0];
args->resource = res_ptr;
args->sampler = samp_ptr;
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
args->addr = param[0];
return;
}
args->dmask = dmask;
args->unorm = is_rect;
- args->da = da;
+ args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
}
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
*
* SI-CI:
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
* filtering manually. The driver sets img7 to a mask clearing
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
* s_and_b32 samp0, samp0, img7
*
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 6a307c4ddba..a54db9e8596 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -94,20 +94,58 @@ shader_buffer_fetch_rsrc(struct si_shader_context *ctx,
static bool tgsi_is_array_image(unsigned target)
{
return target == TGSI_TEXTURE_3D ||
target == TGSI_TEXTURE_CUBE ||
target == TGSI_TEXTURE_1D_ARRAY ||
target == TGSI_TEXTURE_2D_ARRAY ||
target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA;
}
+static enum ac_image_dim
+ac_texture_dim_from_tgsi_target(struct si_screen *screen, enum tgsi_texture_type target)
+{
+ switch (target) {
+ case TGSI_TEXTURE_1D:
+ case TGSI_TEXTURE_SHADOW1D:
+ if (screen->info.chip_class >= GFX9)
+ return ac_image_2d;
+ return ac_image_1d;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_RECT:
+ case TGSI_TEXTURE_SHADOWRECT:
+ return ac_image_2d;
+ case TGSI_TEXTURE_3D:
+ return ac_image_3d;
+ case TGSI_TEXTURE_CUBE:
+ case TGSI_TEXTURE_SHADOWCUBE:
+ case TGSI_TEXTURE_CUBE_ARRAY:
+ case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
+ return ac_image_cube;
+ case TGSI_TEXTURE_1D_ARRAY:
+ case TGSI_TEXTURE_SHADOW1D_ARRAY:
+ if (screen->info.chip_class >= GFX9)
+ return ac_image_2darray;
+ return ac_image_1darray;
+ case TGSI_TEXTURE_2D_ARRAY:
+ case TGSI_TEXTURE_SHADOW2D_ARRAY:
+ return ac_image_2darray;
+ case TGSI_TEXTURE_2D_MSAA:
+ return ac_image_2dmsaa;
+ case TGSI_TEXTURE_2D_ARRAY_MSAA:
+ return ac_image_2darraymsaa;
+ default:
+ unreachable("unhandled texture type");
+ }
+}
+
/**
* Given a 256-bit resource descriptor, force the DCC enable bit to off.
*
* At least on Tonga, executing image stores on images with DCC enabled and
* non-trivial can eventually lead to lockups. This can occur when an
* application binds an image as read-only but then uses a shader that writes
* to it. The OpenGL spec allows almost arbitrarily bad behavior (including
* program termination) in this case, but it doesn't cost much to be a bit
* nicer: disabling DCC in the shader still leads to undefined results but
* avoids the lockup.
@@ -979,26 +1017,26 @@ static void set_tex_fetch_args(struct si_shader_context *ctx,
/* Pad to power of two vector */
while (count < util_next_power_of_two(count))
param[count++] = LLVMGetUndef(ctx->i32);
if (count > 1)
args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
else
args.addr = param[0];
+ args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
args.resource = res_ptr;
args.sampler = samp_ptr;
args.dmask = dmask;
args.unorm = target == TGSI_TEXTURE_RECT ||
target == TGSI_TEXTURE_SHADOWRECT;
- args.da = tgsi_is_array_sampler(target);
/* Ugly, but we seem to have no other choice right now. */
STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args));
memcpy(emit_data->args, &args, sizeof(args));
}
static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
unsigned target, LLVMValueRef out)
{
LLVMBuilderRef builder = ctx->ac.builder;
@@ -1918,21 +1956,29 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false);
}
addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr));
args.opcode = ac_image_load;
args.resource = image;
args.addr = addr_vec;
args.dmask = 0xf;
- args.da = ctx->shader->key.mono.u.ps.fbfetch_layered;
+ if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
+ args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+ ac_image_2darraymsaa : ac_image_2dmsaa;
+ else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
+ args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+ ac_image_1darray : ac_image_1d;
+ else
+ args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
+ ac_image_2darray : ac_image_2d;
emit_data->output[emit_data->chan] =
ac_build_image_opcode(&ctx->ac, &args);
}
static const struct lp_build_tgsi_action tex_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
};
--
2.14.1
More information about the mesa-dev
mailing list