[Mesa-dev] [PATCH 2/5] amd/common: pass address components individually to ac_build_image_intrinsic
Nicolai Hähnle
nhaehnle at gmail.com
Wed Apr 11 11:13:54 UTC 2018
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
This is in preparation for the new image intrinsics.
---
src/amd/common/ac_llvm_build.c | 101 +++++-
src/amd/common/ac_llvm_build.h | 14 +-
src/amd/common/ac_nir_to_llvm.c | 365 +++++++--------------
src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h | 2 +-
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 222 +++++--------
5 files changed, 295 insertions(+), 409 deletions(-)
diff --git a/src/amd/common/ac_llvm_build.c b/src/amd/common/ac_llvm_build.c
index 2bf38f809bb..edc729c0127 100644
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -30,20 +30,21 @@
#include "c11/threads.h"
#include <assert.h>
#include <stdio.h>
#include "ac_llvm_util.h"
#include "ac_exp_param.h"
#include "util/bitscan.h"
#include "util/macros.h"
#include "util/u_atomic.h"
+#include "util/u_math.h"
#include "sid.h"
#include "shader_enums.h"
#define AC_LLVM_INITIAL_CF_DEPTH 4
/* Data for if/else/endif and bgnloop/endloop control flow structures.
*/
struct ac_llvm_flow {
/* Loop exit or next part of if/else/endif. */
@@ -1422,42 +1423,117 @@ void ac_build_export_null(struct ac_llvm_context *ctx)
args.target = V_008DFC_SQ_EXP_NULL;
args.compr = 0; /* COMPR flag (0 = 32-bit export) */
args.out[0] = LLVMGetUndef(ctx->f32); /* R */
args.out[1] = LLVMGetUndef(ctx->f32); /* G */
args.out[2] = LLVMGetUndef(ctx->f32); /* B */
args.out[3] = LLVMGetUndef(ctx->f32); /* A */
ac_build_export(ctx, &args);
}
+static unsigned ac_num_coords(enum ac_image_dim dim)
+{
+ switch (dim) {
+ case ac_image_1d:
+ return 1;
+ case ac_image_2d:
+ case ac_image_1darray:
+ return 2;
+ case ac_image_3d:
+ case ac_image_cube:
+ case ac_image_2darray:
+ case ac_image_2dmsaa:
+ return 3;
+ case ac_image_2darraymsaa:
+ return 4;
+ default:
+ unreachable("ac_num_coords: bad dim");
+ }
+}
+
+static unsigned ac_num_derivs(enum ac_image_dim dim)
+{
+ switch (dim) {
+ case ac_image_1d:
+ case ac_image_1darray:
+ return 2;
+ case ac_image_2d:
+ case ac_image_2darray:
+ case ac_image_cube:
+ return 4;
+ case ac_image_3d:
+ return 6;
+ case ac_image_2dmsaa:
+ case ac_image_2darraymsaa:
+ default:
+ unreachable("derivatives not supported");
+ }
+}
+
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a)
{
- LLVMValueRef args[11];
- unsigned num_args = 0;
+ LLVMValueRef args[16];
const char *name = NULL;
char intr_name[128], type[64];
+ assert(!a->lod || a->lod == ctx->i32_0 || a->lod == ctx->f32_0 ||
+ !a->level_zero);
+ assert((a->opcode != ac_image_get_resinfo && a->opcode != ac_image_load_mip) ||
+ a->lod);
+ assert((a->bias ? 1 : 0) +
+ (a->lod ? 1 : 0) +
+ (a->level_zero ? 1 : 0) +
+ (a->derivs[0] ? 1 : 0) <= 1);
+
bool sample = a->opcode == ac_image_sample ||
a->opcode == ac_image_gather4 ||
a->opcode == ac_image_get_lod;
bool da = a->dim == ac_image_cube ||
a->dim == ac_image_1darray ||
a->dim == ac_image_2darray ||
a->dim == ac_image_2darraymsaa;
if (a->opcode == ac_image_get_lod)
da = false;
+ unsigned num_coords =
+ a->opcode != ac_image_get_resinfo ? ac_num_coords(a->dim) : 0;
+ LLVMValueRef addr;
+ unsigned num_addr = 0;
+
+ if (a->offset)
+ args[num_addr++] = ac_to_integer(ctx, a->offset);
+ if (a->bias)
+ args[num_addr++] = ac_to_integer(ctx, a->bias);
+ if (a->compare)
+ args[num_addr++] = ac_to_integer(ctx, a->compare);
+ if (a->derivs[0]) {
+ unsigned num_derivs = ac_num_derivs(a->dim);
+ for (unsigned i = 0; i < num_derivs; ++i)
+ args[num_addr++] = ac_to_integer(ctx, a->derivs[i]);
+ }
+ for (unsigned i = 0; i < num_coords; ++i)
+ args[num_addr++] = ac_to_integer(ctx, a->coords[i]);
+ if (a->lod)
+ args[num_addr++] = ac_to_integer(ctx, a->lod);
+
+ unsigned pad_goal = util_next_power_of_two(num_addr);
+ while (num_addr < pad_goal)
+ args[num_addr++] = LLVMGetUndef(ctx->i32);
+
+ addr = ac_build_gather_values(ctx, args, num_addr);
+
+ unsigned num_args = 0;
if (sample)
- args[num_args++] = ac_to_float(ctx, a->addr);
+ args[num_args++] = ac_to_float(ctx, addr);
else
- args[num_args++] = a->addr;
+ args[num_args++] = ac_to_integer(ctx, addr);
args[num_args++] = a->resource;
if (sample)
args[num_args++] = a->sampler;
args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
if (sample)
args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
args[num_args++] = ctx->i1false; /* glc */
args[num_args++] = ctx->i1false; /* slc */
args[num_args++] = ctx->i1false; /* lwe */
@@ -1482,26 +1558,29 @@ LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
case ac_image_get_resinfo:
name = "llvm.amdgcn.image.getresinfo";
break;
default:
unreachable("invalid image opcode");
}
ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
sizeof(type));
+ bool lod_suffix =
+ a->lod && (a->opcode == ac_image_sample || a->opcode == ac_image_gather4);
+
snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
name,
a->compare ? ".c" : "",
a->bias ? ".b" :
- a->lod ? ".l" :
- a->deriv ? ".d" :
+ lod_suffix ? ".l" :
+ a->derivs[0] ? ".d" :
a->level_zero ? ".lz" : "",
a->offset ? ".o" : "",
type);
LLVMValueRef result =
ac_build_intrinsic(ctx, intr_name,
ctx->v4f32, args, num_args,
AC_FUNC_ATTR_READNONE);
if (!sample) {
result = LLVMBuildBitCast(ctx->builder, result,
@@ -2458,26 +2537,24 @@ LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
*/
void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
LLVMValueRef *addr, bool is_array_tex)
{
struct ac_image_args fmask_load = {};
fmask_load.opcode = ac_image_load;
fmask_load.resource = fmask;
fmask_load.dmask = 0xf;
fmask_load.dim = is_array_tex ? ac_image_2darray : ac_image_2d;
- LLVMValueRef fmask_addr[4];
- memcpy(fmask_addr, addr, sizeof(fmask_addr[0]) * 3);
- fmask_addr[3] = LLVMGetUndef(ac->i32);
-
- fmask_load.addr = ac_build_gather_values(ac, fmask_addr,
- is_array_tex ? 4 : 2);
+ fmask_load.coords[0] = addr[0];
+ fmask_load.coords[1] = addr[1];
+ if (is_array_tex)
+ fmask_load.coords[2] = addr[2];
LLVMValueRef fmask_value = ac_build_image_opcode(ac, &fmask_load);
fmask_value = LLVMBuildExtractElement(ac->builder, fmask_value,
ac->i32_0, "");
/* Apply the formula. */
unsigned sample_chan = is_array_tex ? 3 : 2;
LLVMValueRef final_sample;
final_sample = LLVMBuildMul(ac->builder, addr[sample_chan],
LLVMConstInt(ac->i32, 4, 0), "");
diff --git a/src/amd/common/ac_llvm_build.h b/src/amd/common/ac_llvm_build.h
index a51390794a7..fcd465ef070 100644
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -324,32 +324,32 @@ enum ac_image_dim {
ac_image_cube, // includes cube arrays
ac_image_1darray,
ac_image_2darray,
ac_image_2dmsaa,
ac_image_2darraymsaa,
};
struct ac_image_args {
enum ac_image_opcode opcode;
enum ac_image_dim dim;
- bool level_zero;
- bool bias;
- bool lod;
- bool deriv;
- bool compare;
- bool offset;
LLVMValueRef resource;
LLVMValueRef sampler;
- LLVMValueRef addr;
+ LLVMValueRef offset;
+ LLVMValueRef bias;
+ LLVMValueRef compare;
+ LLVMValueRef derivs[6];
+ LLVMValueRef coords[4];
+ LLVMValueRef lod; // also used by ac_image_get_resinfo
unsigned dmask;
bool unorm;
+ bool level_zero;
};
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a);
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
diff --git a/src/amd/common/ac_nir_to_llvm.c b/src/amd/common/ac_nir_to_llvm.c
index ce7afaf96b4..ddd1265c8da 100644
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1145,61 +1145,53 @@ static void build_int_type_name(
LLVMGetVectorSize(type));
else
strcpy(buf, "i32");
}
static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
struct ac_image_args *args,
const nir_tex_instr *instr)
{
enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
- LLVMValueRef coord = args->addr;
LLVMValueRef half_texel[2];
LLVMValueRef compare_cube_wa = NULL;
LLVMValueRef result;
- int c;
- unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
//TODO Rect
{
struct ac_image_args txq_args = { 0 };
txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array);
txq_args.opcode = ac_image_get_resinfo;
txq_args.dmask = 0xf;
- txq_args.addr = ctx->i32_0;
+ txq_args.lod = ctx->i32_0;
txq_args.resource = args->resource;
LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
- for (c = 0; c < 2; c++) {
+ for (unsigned c = 0; c < 2; c++) {
half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
LLVMConstInt(ctx->i32, c, false), "");
half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
LLVMConstReal(ctx->f32, -0.5), "");
}
}
- LLVMValueRef orig_coords = args->addr;
+ LLVMValueRef orig_coords[2] = { args->coords[0], args->coords[1] };
- for (c = 0; c < 2; c++) {
+ for (unsigned c = 0; c < 2; c++) {
LLVMValueRef tmp;
- LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
- tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
- tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
- tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
- tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
- coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
+ tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
+ args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
}
-
/*
* Apparantly cube has issue with integer types that the workaround doesn't solve,
* so this tests if the format is 8_8_8_8 and an integer type do an alternate
* workaround by sampling using a scaled type and converting.
* This is taken from amdgpu-pro shaders.
*/
/* NOTE this produces some ugly code compared to amdgpu-pro,
* LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
* and then reads them back. -pro generates two selects,
* one s_cmp for the descriptor rewriting
@@ -1229,111 +1221,101 @@ static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
LLVMConstInt(ctx->i32, 0x14000000, false), "");
/* replace the NUM FORMAT in the descriptor */
tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
/* don't modify the coordinates for this case */
- coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
+ for (unsigned c = 0; c < 2; ++c)
+ args->coords[c] = LLVMBuildSelect(
+ ctx->builder, compare_cube_wa,
+ orig_coords[c], args->coords[c], "");
}
- args->addr = coord;
result = ac_build_image_opcode(ctx, args);
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
LLVMValueRef tmp, tmp2;
/* if the cube workaround is in place, f2i the result. */
- for (c = 0; c < 4; c++) {
+ for (unsigned c = 0; c < 4; c++) {
tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
if (stype == GLSL_TYPE_UINT)
tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
else
tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
}
}
return result;
}
static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
const nir_tex_instr *instr,
- bool lod_is_zero,
struct ac_image_args *args)
{
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
if (ctx->abi->gfx9_stride_size_workaround) {
return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
args->resource,
- args->addr,
+ args->coords[0],
ctx->ac.i32_0,
util_last_bit(mask),
false, true);
} else {
return ac_build_buffer_load_format(&ctx->ac,
args->resource,
- args->addr,
+ args->coords[0],
ctx->ac.i32_0,
util_last_bit(mask),
false, true);
}
}
args->opcode = ac_image_sample;
- args->compare = instr->is_shadow;
switch (instr->op) {
case nir_texop_txf:
case nir_texop_txf_ms:
case nir_texop_samples_identical:
- args->opcode = lod_is_zero ||
+ args->opcode = args->level_zero ||
instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
ac_image_load : ac_image_load_mip;
- args->compare = false;
- args->offset = false;
- break;
- case nir_texop_txb:
- args->bias = true;
- break;
- case nir_texop_txl:
- if (lod_is_zero)
- args->level_zero = true;
- else
- args->lod = true;
+ args->level_zero = false;
break;
case nir_texop_txs:
case nir_texop_query_levels:
args->opcode = ac_image_get_resinfo;
+ if (!args->lod)
+ args->lod = ctx->ac.i32_0;
+ args->level_zero = false;
break;
case nir_texop_tex:
- if (ctx->stage != MESA_SHADER_FRAGMENT)
+ if (ctx->stage != MESA_SHADER_FRAGMENT) {
+ assert(!args->lod);
args->level_zero = true;
- break;
- case nir_texop_txd:
- args->deriv = true;
+ }
break;
case nir_texop_tg4:
args->opcode = ac_image_gather4;
args->level_zero = true;
break;
case nir_texop_lod:
args->opcode = ac_image_get_lod;
- args->compare = false;
- args->offset = false;
break;
default:
break;
}
if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
return lower_gather4_integer(&ctx->ac, args, instr);
}
@@ -2074,37 +2056,32 @@ glsl_is_array_image(const struct glsl_type *type)
*
* The sample index should be adjusted as follows:
* sample_index = (fmask >> (sample_index * 4)) & 0xF;
*/
static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
LLVMValueRef coord_x, LLVMValueRef coord_y,
LLVMValueRef coord_z,
LLVMValueRef sample_index,
LLVMValueRef fmask_desc_ptr)
{
- LLVMValueRef fmask_load_address[4];
+ struct ac_image_args args = {0};
LLVMValueRef res;
- fmask_load_address[0] = coord_x;
- fmask_load_address[1] = coord_y;
- if (coord_z) {
- fmask_load_address[2] = coord_z;
- fmask_load_address[3] = LLVMGetUndef(ctx->i32);
- }
-
- struct ac_image_args args = {0};
+ args.coords[0] = coord_x;
+ args.coords[1] = coord_y;
+ if (coord_z)
+ args.coords[2] = coord_z;
args.opcode = ac_image_load;
args.dim = coord_z ? ac_image_2darray : ac_image_2d;
args.resource = fmask_desc_ptr;
args.dmask = 0xf;
- args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
res = ac_build_image_opcode(ctx, &args);
res = ac_to_integer(ctx, res);
LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
res,
ctx->i32_0, "");
@@ -2435,21 +2412,21 @@ static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = glsl_without_array(var->type);
struct ac_image_args args = { 0 };
args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type),
glsl_sampler_type_is_array(type));
args.dmask = 0xf;
args.resource = get_sampler_desc(ctx, instr->variables[0],
AC_DESC_IMAGE, NULL, true, false);
args.opcode = ac_image_get_resinfo;
- args.addr = ctx->ac.i32_0;
+ args.lod = ctx->ac.i32_0;
return ac_build_image_opcode(&ctx->ac, &args);
}
static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
const nir_intrinsic_instr *instr)
{
LLVMValueRef res;
const nir_variable *var = instr->variables[0]->var;
const struct glsl_type *type = glsl_without_array(var->type);
@@ -2459,21 +2436,21 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
get_sampler_desc(ctx, instr->variables[0],
AC_DESC_BUFFER, NULL, true, false), true);
struct ac_image_args args = { 0 };
args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
glsl_sampler_type_is_array(type));
args.dmask = 0xf;
args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
args.opcode = ac_image_get_resinfo;
- args.addr = ctx->ac.i32_0;
+ args.lod = ctx->ac.i32_0;
res = ac_build_image_opcode(&ctx->ac, &args);
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
glsl_sampler_type_is_array(type)) {
LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
@@ -3193,52 +3170,20 @@ static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
}
}
return ctx->abi->load_sampler_desc(ctx->abi,
descriptor_set,
base_index,
constant_index, index,
desc_type, image, write, bindless);
}
-static void set_tex_fetch_args(struct ac_llvm_context *ctx,
- struct ac_image_args *args,
- const nir_tex_instr *instr,
- nir_texop op,
- LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
- LLVMValueRef *param, unsigned count,
- unsigned dmask)
-{
- unsigned is_rect = 0;
-
- /* Pad to power of two vector */
- while (count < util_next_power_of_two(count))
- param[count++] = LLVMGetUndef(ctx->i32);
-
- if (count > 1)
- args->addr = ac_build_gather_values(ctx, param, count);
- else
- args->addr = param[0];
-
- args->resource = res_ptr;
- args->sampler = samp_ptr;
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
- args->addr = param[0];
- return;
- }
-
- args->dmask = dmask;
- args->unorm = is_rect;
- args->dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
-}
-
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
*
* SI-CI:
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
* filtering manually. The driver sets img7 to a mask clearing
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
* s_and_b32 samp0, samp0, img7
*
* VI:
* The ANISO_OVERRIDE sampler field enables this fix in TA.
@@ -3289,87 +3234,84 @@ static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
coord = ac_to_float(ctx, coord);
coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
coord = ac_to_integer(ctx, coord);
return coord;
}
static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
{
LLVMValueRef result = NULL;
struct ac_image_args args = { 0 };
- unsigned dmask = 0xf;
- LLVMValueRef address[16];
- LLVMValueRef coords[5];
- LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
- LLVMValueRef bias = NULL, offsets = NULL;
- LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
+ LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
LLVMValueRef ddx = NULL, ddy = NULL;
- LLVMValueRef derivs[6];
- unsigned chan, count = 0;
- unsigned const_src = 0, num_deriv_comp = 0;
- bool lod_is_zero = false;
+ unsigned offset_src = 0;
- tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
+ tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
for (unsigned i = 0; i < instr->num_srcs; i++) {
switch (instr->src[i].src_type) {
- case nir_tex_src_coord:
- coord = get_src(ctx, instr->src[i].src);
+ case nir_tex_src_coord: {
+ LLVMValueRef coord = get_src(ctx, instr->src[i].src);
+ for (unsigned chan = 0; chan < instr->coord_components; ++chan)
+ args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
break;
+ }
case nir_tex_src_projector:
break;
case nir_tex_src_comparator:
- comparator = get_src(ctx, instr->src[i].src);
+ if (instr->is_shadow)
+ args.compare = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_offset:
- offsets = get_src(ctx, instr->src[i].src);
- const_src = i;
+ args.offset = get_src(ctx, instr->src[i].src);
+ offset_src = i;
break;
case nir_tex_src_bias:
- bias = get_src(ctx, instr->src[i].src);
+ if (instr->op == nir_texop_txb)
+ args.bias = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_lod: {
nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
if (val && val->i32[0] == 0)
- lod_is_zero = true;
- lod = get_src(ctx, instr->src[i].src);
+ args.level_zero = true;
+ else
+ args.lod = get_src(ctx, instr->src[i].src);
break;
}
case nir_tex_src_ms_index:
sample_index = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_ms_mcs:
break;
case nir_tex_src_ddx:
ddx = get_src(ctx, instr->src[i].src);
- num_deriv_comp = instr->src[i].src.ssa->num_components;
break;
case nir_tex_src_ddy:
ddy = get_src(ctx, instr->src[i].src);
break;
case nir_tex_src_texture_offset:
case nir_tex_src_sampler_offset:
case nir_tex_src_plane:
default:
break;
}
}
if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
- result = get_buffer_size(ctx, res_ptr, true);
+ result = get_buffer_size(ctx, args.resource, true);
goto write_result;
}
if (instr->op == nir_texop_texture_samples) {
LLVMValueRef res, samples, is_msaa;
- res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
+ res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
samples = LLVMBuildExtractElement(ctx->ac.builder, res,
LLVMConstInt(ctx->ac.i32, 3, false), "");
is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
LLVMConstInt(ctx->ac.i32, 28, false), "");
is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
LLVMConstInt(ctx->ac.i32, 0xe, false), "");
is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
LLVMConstInt(ctx->ac.i32, 0xe, false), "");
samples = LLVMBuildLShr(ctx->ac.builder, samples,
@@ -3377,240 +3319,173 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
samples = LLVMBuildAnd(ctx->ac.builder, samples,
LLVMConstInt(ctx->ac.i32, 0xf, false), "");
samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
samples, "");
samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
ctx->ac.i32_1, "");
result = samples;
goto write_result;
}
- if (coord)
- for (chan = 0; chan < instr->coord_components; chan++)
- coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
-
- if (offsets && instr->op != nir_texop_txf) {
+ if (args.offset && instr->op != nir_texop_txf) {
LLVMValueRef offset[3], pack;
- for (chan = 0; chan < 3; ++chan)
+ for (unsigned chan = 0; chan < 3; ++chan)
offset[chan] = ctx->ac.i32_0;
- args.offset = true;
- for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
- offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
+ unsigned num_components = ac_get_llvm_num_components(args.offset);
+ for (unsigned chan = 0; chan < num_components; chan++) {
+ offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
if (chan)
offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
}
pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
- address[count++] = pack;
-
+ args.offset = pack;
}
- /* pack LOD bias value */
- if (instr->op == nir_texop_txb && bias) {
- address[count++] = bias;
- }
-
- /* Pack depth comparison value */
- if (instr->is_shadow && comparator) {
- LLVMValueRef z = ac_to_float(&ctx->ac,
- ac_llvm_extract_elem(&ctx->ac, comparator, 0));
-
- /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
- * so the depth comparison value isn't clamped for Z16 and
- * Z24 anymore. Do it manually here.
- *
- * It's unnecessary if the original texture format was
- * Z32_FLOAT, but we don't know that here.
- */
- if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
- z = ac_build_clamp(&ctx->ac, z);
- address[count++] = z;
- }
+ /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
+ * so the depth comparison value isn't clamped for Z16 and
+ * Z24 anymore. Do it manually here.
+ *
+ * It's unnecessary if the original texture format was
+ * Z32_FLOAT, but we don't know that here.
+ */
+ if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
+ args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
/* pack derivatives */
if (ddx || ddy) {
int num_src_deriv_channels, num_dest_deriv_channels;
switch (instr->sampler_dim) {
case GLSL_SAMPLER_DIM_3D:
case GLSL_SAMPLER_DIM_CUBE:
- num_deriv_comp = 3;
num_src_deriv_channels = 3;
num_dest_deriv_channels = 3;
break;
case GLSL_SAMPLER_DIM_2D:
default:
num_src_deriv_channels = 2;
num_dest_deriv_channels = 2;
- num_deriv_comp = 2;
break;
case GLSL_SAMPLER_DIM_1D:
num_src_deriv_channels = 1;
if (ctx->ac.chip_class >= GFX9) {
num_dest_deriv_channels = 2;
- num_deriv_comp = 2;
} else {
num_dest_deriv_channels = 1;
- num_deriv_comp = 1;
}
break;
}
for (unsigned i = 0; i < num_src_deriv_channels; i++) {
- derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
- derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
+ args.derivs[i] = ac_to_float(&ctx->ac,
+ ac_llvm_extract_elem(&ctx->ac, ddx, i));
+ args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
+ ac_llvm_extract_elem(&ctx->ac, ddy, i));
}
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
- derivs[i] = ctx->ac.f32_0;
- derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
+ args.derivs[i] = ctx->ac.f32_0;
+ args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
}
}
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
- for (chan = 0; chan < instr->coord_components; chan++)
- coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
+ for (unsigned chan = 0; chan < instr->coord_components; chan++)
+ args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
if (instr->coord_components == 3)
- coords[3] = LLVMGetUndef(ctx->ac.f32);
+ args.coords[3] = LLVMGetUndef(ctx->ac.f32);
ac_prepare_cube_coords(&ctx->ac,
instr->op == nir_texop_txd, instr->is_array,
- instr->op == nir_texop_lod, coords, derivs);
- if (num_deriv_comp)
- num_deriv_comp--;
+ instr->op == nir_texop_lod, args.coords, args.derivs);
}
- if (ddx || ddy) {
- for (unsigned i = 0; i < num_deriv_comp * 2; i++)
- address[count++] = derivs[i];
- }
-
- /* Pack texture coordinates */
- if (coord) {
- address[count++] = coords[0];
- if (instr->coord_components > 1) {
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
- coords[1] = apply_round_slice(&ctx->ac, coords[1]);
- }
- address[count++] = coords[1];
- }
- if (instr->coord_components > 2) {
- if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
- instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
- instr->is_array &&
- instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
- coords[2] = apply_round_slice(&ctx->ac, coords[2]);
- }
- address[count++] = coords[2];
- }
-
- if (ctx->ac.chip_class >= GFX9) {
- LLVMValueRef filler;
- if (instr->op == nir_texop_txf)
- filler = ctx->ac.i32_0;
- else
- filler = LLVMConstReal(ctx->ac.f32, 0.5);
-
- if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
- /* No nir_texop_lod, because it does not take a slice
- * even with array textures. */
- if (instr->is_array && instr->op != nir_texop_lod ) {
- address[count] = address[count - 1];
- address[count - 1] = filler;
- count++;
- } else
- address[count++] = filler;
- }
- }
+ /* Texture coordinates fixups */
+ if (instr->coord_components > 2 &&
+ (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
+ instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
+ instr->is_array &&
+ instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
+ args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
}
- /* Pack LOD */
- if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) {
- address[count++] = lod;
- } else if (instr->op == nir_texop_txf_ms && sample_index) {
- address[count++] = sample_index;
- } else if(instr->op == nir_texop_txs) {
- count = 0;
- if (lod)
- address[count++] = lod;
+ if (ctx->ac.chip_class >= GFX9 &&
+ instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
+ instr->op != nir_texop_lod) {
+ LLVMValueRef filler;
+ if (instr->op == nir_texop_txf)
+ filler = ctx->ac.i32_0;
else
- address[count++] = ctx->ac.i32_0;
- }
+ filler = LLVMConstReal(ctx->ac.f32, 0.5);
- for (chan = 0; chan < count; chan++) {
- address[chan] = LLVMBuildBitCast(ctx->ac.builder,
- address[chan], ctx->ac.i32, "");
+ if (instr->is_array)
+ args.coords[2] = args.coords[1];
+ args.coords[1] = filler;
}
+ /* Pack sample index */
+ if (instr->op == nir_texop_txf_ms && sample_index)
+ args.coords[instr->coord_components] = sample_index;
+
if (instr->op == nir_texop_samples_identical) {
- LLVMValueRef txf_address[4];
struct ac_image_args txf_args = { 0 };
- unsigned txf_count = count;
- memcpy(txf_address, address, sizeof(txf_address));
-
- if (!instr->is_array)
- txf_address[2] = ctx->ac.i32_0;
- txf_address[3] = ctx->ac.i32_0;
-
- set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
- fmask_ptr, NULL,
- txf_address, txf_count, 0xf);
+ memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
- result = build_tex_intrinsic(ctx, instr, false, &txf_args);
+ txf_args.dmask = 0xf;
+ txf_args.resource = fmask_ptr;
+ txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
+ result = build_tex_intrinsic(ctx, instr, &txf_args);
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
goto write_result;
}
if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
instr->op != nir_texop_txs) {
unsigned sample_chan = instr->is_array ? 3 : 2;
- address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
- address[0],
- address[1],
- instr->is_array ? address[2] : NULL,
- address[sample_chan],
- fmask_ptr);
+ args.coords[sample_chan] = adjust_sample_index_using_fmask(
+ &ctx->ac, args.coords[0], args.coords[1],
+ instr->is_array ? args.coords[2] : NULL,
+ args.coords[sample_chan], fmask_ptr);
}
- if (offsets && instr->op == nir_texop_txf) {
+ if (args.offset && instr->op == nir_texop_txf) {
nir_const_value *const_offset =
- nir_src_as_const_value(instr->src[const_src].src);
- int num_offsets = instr->src[const_src].src.ssa->num_components;
+ nir_src_as_const_value(instr->src[offset_src].src);
+ int num_offsets = instr->src[offset_src].src.ssa->num_components;
assert(const_offset);
num_offsets = MIN2(num_offsets, instr->coord_components);
- if (num_offsets > 2)
- address[2] = LLVMBuildAdd(ctx->ac.builder,
- address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
- if (num_offsets > 1)
- address[1] = LLVMBuildAdd(ctx->ac.builder,
- address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
- address[0] = LLVMBuildAdd(ctx->ac.builder,
- address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
-
+ for (unsigned i = 0; i < num_offsets; ++i) {
+ args.coords[i] = LLVMBuildAdd(
+ ctx->ac.builder, args.coords[i],
+ LLVMConstInt(ctx->ac.i32, const_offset->i32[i], false), "");
+ }
+ args.offset = NULL;
}
/* TODO TG4 support */
+ args.dmask = 0xf;
if (instr->op == nir_texop_tg4) {
if (instr->is_shadow)
- dmask = 1;
+ args.dmask = 1;
else
- dmask = 1 << instr->component;
+ args.dmask = 1 << instr->component;
}
- set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
- res_ptr, samp_ptr, address, count, dmask);
- result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
+ if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
+ args.dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
+ result = build_tex_intrinsic(ctx, instr, &args);
if (instr->op == nir_texop_query_levels)
result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
else if (instr->is_shadow && instr->is_new_style_shadow &&
instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
instr->op != nir_texop_tg4)
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
else if (instr->op == nir_texop_txs &&
instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
instr->is_array) {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
index c92517fee28..d30f9da539e 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.h
@@ -43,21 +43,21 @@ struct lp_build_emit_data {
* order of the arguments should be as follows:
* SOA: s0.x, s0.y, s0.z, s0.w, s1.x, s1.y, s1.z, s1.w, s2.x, s2.y, s2.x, s2.w
* AOS: s0.xyzw, s1.xyzw, s2.xyzw
* TEXTURE Instructions: coord.xyzw
*
* Arguments should be packed into the args array. For example an SOA
* instructions that reads s0.x and s1.x args should look like this:
* args[0] = s0.x;
* args[1] = s1.x;
*/
- LLVMValueRef args[12];
+ LLVMValueRef args[18];
/**
* Number of arguments in the args array.
*/
unsigned arg_count;
/**
* The type output type of the opcode. This should be set in the
* lp_build_tgsi_action::fetch_args function.
*/
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index a54db9e8596..1c653839aea 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1001,46 +1001,30 @@ static void atomic_emit(
}
tmp = lp_build_intrinsic(
builder, intrinsic_name, ctx->i32,
emit_data->args, emit_data->arg_count, 0);
emit_data->output[emit_data->chan] = ac_to_float(&ctx->ac, tmp);
}
static void set_tex_fetch_args(struct si_shader_context *ctx,
struct lp_build_emit_data *emit_data,
- unsigned target,
- LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
- LLVMValueRef *param, unsigned count,
- unsigned dmask)
+ struct ac_image_args *args,
+ unsigned target)
{
- struct ac_image_args args = {};
-
- /* Pad to power of two vector */
- while (count < util_next_power_of_two(count))
- param[count++] = LLVMGetUndef(ctx->i32);
-
- if (count > 1)
- args.addr = lp_build_gather_values(&ctx->gallivm, param, count);
- else
- args.addr = param[0];
-
- args.dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
- args.resource = res_ptr;
- args.sampler = samp_ptr;
- args.dmask = dmask;
- args.unorm = target == TGSI_TEXTURE_RECT ||
- target == TGSI_TEXTURE_SHADOWRECT;
+ args->dim = ac_texture_dim_from_tgsi_target(ctx->screen, target);
+ args->unorm = target == TGSI_TEXTURE_RECT ||
+ target == TGSI_TEXTURE_SHADOWRECT;
/* Ugly, but we seem to have no other choice right now. */
- STATIC_ASSERT(sizeof(args) <= sizeof(emit_data->args));
- memcpy(emit_data->args, &args, sizeof(args));
+ STATIC_ASSERT(sizeof(*args) <= sizeof(emit_data->args));
+ memcpy(emit_data->args, args, sizeof(*args));
}
static LLVMValueRef fix_resinfo(struct si_shader_context *ctx,
unsigned target, LLVMValueRef out)
{
LLVMBuilderRef builder = ctx->ac.builder;
/* 1D textures are allocated and used as 2D on GFX9. */
if (ctx->screen->info.chip_class >= GFX9 &&
(target == TGSI_TEXTURE_1D_ARRAY ||
@@ -1076,33 +1060,33 @@ static void resq_fetch_args(
emit_data->dst_type = ctx->v4i32;
if (reg->Register.File == TGSI_FILE_BUFFER) {
emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg, false);
emit_data->arg_count = 1;
} else if (inst->Memory.Texture == TGSI_TEXTURE_BUFFER) {
image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
&emit_data->args[0]);
emit_data->arg_count = 1;
} else {
- LLVMValueRef res_ptr;
+ struct ac_image_args args = {};
unsigned image_target;
if (inst->Memory.Texture == TGSI_TEXTURE_3D)
image_target = TGSI_TEXTURE_2D_ARRAY;
else
image_target = inst->Memory.Texture;
image_fetch_rsrc(bld_base, reg, false, inst->Memory.Texture,
- &res_ptr);
- set_tex_fetch_args(ctx, emit_data, image_target,
- res_ptr, NULL, &ctx->i32_0, 1,
- 0xf);
+ &args.resource);
+ args.lod = ctx->i32_0;
+ args.dmask = 0xf;
+ set_tex_fetch_args(ctx, emit_data, &args, image_target);
}
}
static void resq_emit(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMBuilderRef builder = ctx->ac.builder;
@@ -1255,36 +1239,35 @@ static void tex_fetch_ptrs(
}
}
static void txq_fetch_args(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
const struct tgsi_full_instruction *inst = emit_data->inst;
unsigned target = inst->Texture.Texture;
- LLVMValueRef res_ptr;
- LLVMValueRef address;
+ struct ac_image_args args = {};
- tex_fetch_ptrs(bld_base, emit_data, &res_ptr, NULL, NULL);
+ tex_fetch_ptrs(bld_base, emit_data, &args.resource, NULL, NULL);
if (target == TGSI_TEXTURE_BUFFER) {
/* Read the size from the buffer descriptor directly. */
- emit_data->args[0] = get_buffer_size(bld_base, res_ptr);
+ emit_data->args[0] = get_buffer_size(bld_base, args.resource);
return;
}
/* Textures - set the mip level. */
- address = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+ args.lod = lp_build_emit_fetch(bld_base, inst, 0, TGSI_CHAN_X);
+ args.dmask = 0xf;
- set_tex_fetch_args(ctx, emit_data, target, res_ptr,
- NULL, &address, 1, 0xf);
+ set_tex_fetch_args(ctx, emit_data, &args, target);
}
static void txq_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
struct ac_image_args args;
unsigned target = emit_data->inst->Texture.Texture;
@@ -1303,57 +1286,51 @@ static void txq_emit(const struct lp_build_tgsi_action *action,
}
static void tex_fetch_args(
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
const struct tgsi_full_instruction *inst = emit_data->inst;
unsigned opcode = inst->Instruction.Opcode;
unsigned target = inst->Texture.Texture;
- LLVMValueRef coords[5], derivs[6];
- LLVMValueRef address[16];
- unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
+ struct ac_image_args args = {};
int ref_pos = tgsi_util_get_shadow_ref_src_index(target);
- unsigned count = 0;
unsigned chan;
- unsigned num_deriv_channels = 0;
bool has_offset = inst->Texture.NumOffsets > 0;
- LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
- unsigned dmask = 0xf;
+ LLVMValueRef fmask_ptr = NULL;
- tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
+ tex_fetch_ptrs(bld_base, emit_data, &args.resource, &args.sampler, &fmask_ptr);
if (target == TGSI_TEXTURE_BUFFER) {
emit_data->dst_type = ctx->v4f32;
- emit_data->args[0] = res_ptr;
+ emit_data->args[0] = args.resource;
emit_data->args[1] = ctx->i32_0;
emit_data->args[2] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_X);
emit_data->arg_count = 3;
return;
}
/* Fetch and project texture coordinates */
- coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
+ args.coords[3] = lp_build_emit_fetch(bld_base, emit_data->inst, 0, TGSI_CHAN_W);
for (chan = 0; chan < 3; chan++) {
- coords[chan] = lp_build_emit_fetch(bld_base,
+ args.coords[chan] = lp_build_emit_fetch(bld_base,
emit_data->inst, 0,
chan);
if (opcode == TGSI_OPCODE_TXP)
- coords[chan] = lp_build_emit_llvm_binary(bld_base,
- TGSI_OPCODE_DIV,
- coords[chan],
- coords[3]);
+ args.coords[chan] = lp_build_emit_llvm_binary(
+ bld_base, TGSI_OPCODE_DIV,
+ args.coords[chan], args.coords[3]);
}
if (opcode == TGSI_OPCODE_TXP)
- coords[3] = ctx->ac.f32_1;
+ args.coords[3] = ctx->ac.f32_1;
/* Pack offsets. */
if (has_offset &&
opcode != TGSI_OPCODE_TXF &&
opcode != TGSI_OPCODE_TXF_LZ) {
/* The offsets are six-bit signed integers packed like this:
* X=[5:0], Y=[13:8], and Z=[21:16].
*/
LLVMValueRef offset[3], pack;
@@ -1364,237 +1341,222 @@ static void tex_fetch_args(
emit_data->inst, 0, chan);
offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
LLVMConstInt(ctx->i32, 0x3f, 0), "");
if (chan)
offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
LLVMConstInt(ctx->i32, chan*8, 0), "");
}
pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
- address[count++] = pack;
+ args.offset = pack;
}
/* Pack LOD bias value */
if (opcode == TGSI_OPCODE_TXB)
- address[count++] = coords[3];
+ args.bias = args.coords[3];
if (opcode == TGSI_OPCODE_TXB2)
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+ args.bias = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
/* Pack depth comparison value */
if (tgsi_is_shadow_target(target) && opcode != TGSI_OPCODE_LODQ) {
LLVMValueRef z;
if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
z = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
} else {
assert(ref_pos >= 0);
- z = coords[ref_pos];
+ z = args.coords[ref_pos];
}
/* Section 8.23.1 (Depth Texture Comparison Mode) of the
* OpenGL 4.5 spec says:
*
* "If the texture’s internal format indicates a fixed-point
* depth texture, then D_t and D_ref are clamped to the
* range [0, 1]; otherwise no clamping is performed."
*
* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
* so the depth comparison value isn't clamped for Z16 and
* Z24 anymore. Do it manually here.
*/
if (ctx->screen->info.chip_class >= VI) {
LLVMValueRef upgraded;
LLVMValueRef clamped;
- upgraded = LLVMBuildExtractElement(ctx->ac.builder, samp_ptr,
+ upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
LLVMConstInt(ctx->i32, 3, false), "");
upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
LLVMConstInt(ctx->i32, 29, false), "");
upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->i1, "");
clamped = ac_build_clamp(&ctx->ac, z);
z = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, z, "");
}
- address[count++] = z;
+ args.compare = z;
}
/* Pack user derivatives */
if (opcode == TGSI_OPCODE_TXD) {
int param, num_src_deriv_channels, num_dst_deriv_channels;
switch (target) {
case TGSI_TEXTURE_3D:
num_src_deriv_channels = 3;
num_dst_deriv_channels = 3;
- num_deriv_channels = 3;
break;
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
num_src_deriv_channels = 2;
num_dst_deriv_channels = 2;
- num_deriv_channels = 2;
break;
case TGSI_TEXTURE_CUBE:
case TGSI_TEXTURE_SHADOWCUBE:
case TGSI_TEXTURE_CUBE_ARRAY:
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
/* Cube derivatives will be converted to 2D. */
num_src_deriv_channels = 3;
num_dst_deriv_channels = 3;
- num_deriv_channels = 2;
break;
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
num_src_deriv_channels = 1;
/* 1D textures are allocated and used as 2D on GFX9. */
if (ctx->screen->info.chip_class >= GFX9) {
num_dst_deriv_channels = 2;
- num_deriv_channels = 2;
} else {
num_dst_deriv_channels = 1;
- num_deriv_channels = 1;
}
break;
default:
unreachable("invalid target");
}
for (param = 0; param < 2; param++) {
for (chan = 0; chan < num_src_deriv_channels; chan++)
- derivs[param * num_dst_deriv_channels + chan] =
+ args.derivs[param * num_dst_deriv_channels + chan] =
lp_build_emit_fetch(bld_base, inst, param+1, chan);
/* Fill in the rest with zeros. */
for (chan = num_src_deriv_channels;
chan < num_dst_deriv_channels; chan++)
- derivs[param * num_dst_deriv_channels + chan] =
+ args.derivs[param * num_dst_deriv_channels + chan] =
ctx->ac.f32_0;
}
}
if (target == TGSI_TEXTURE_CUBE ||
target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
ac_prepare_cube_coords(&ctx->ac,
opcode == TGSI_OPCODE_TXD,
target == TGSI_TEXTURE_CUBE_ARRAY ||
target == TGSI_TEXTURE_SHADOWCUBE_ARRAY,
opcode == TGSI_OPCODE_LODQ,
- coords, derivs);
+ args.coords, args.derivs);
} else if (tgsi_is_array_sampler(target) &&
opcode != TGSI_OPCODE_TXF &&
opcode != TGSI_OPCODE_TXF_LZ &&
ctx->screen->info.chip_class <= VI) {
unsigned array_coord = target == TGSI_TEXTURE_1D_ARRAY ? 1 : 2;
- coords[array_coord] =
+ args.coords[array_coord] =
ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32,
- &coords[array_coord], 1, 0);
+ &args.coords[array_coord], 1, 0);
}
- if (opcode == TGSI_OPCODE_TXD)
- for (int i = 0; i < num_deriv_channels * 2; i++)
- address[count++] = derivs[i];
-
- /* Pack texture coordinates */
- address[count++] = coords[0];
- if (num_coords > 1)
- address[count++] = coords[1];
- if (num_coords > 2)
- address[count++] = coords[2];
-
/* 1D textures are allocated and used as 2D on GFX9. */
if (ctx->screen->info.chip_class >= GFX9) {
LLVMValueRef filler;
/* Use 0.5, so that we don't sample the border color. */
if (opcode == TGSI_OPCODE_TXF ||
opcode == TGSI_OPCODE_TXF_LZ)
filler = ctx->i32_0;
else
filler = LLVMConstReal(ctx->f32, 0.5);
if (target == TGSI_TEXTURE_1D ||
target == TGSI_TEXTURE_SHADOW1D) {
- address[count++] = filler;
+ args.coords[1] = filler;
} else if (target == TGSI_TEXTURE_1D_ARRAY ||
target == TGSI_TEXTURE_SHADOW1D_ARRAY) {
- address[count] = address[count - 1];
- address[count - 1] = filler;
- count++;
+ args.coords[2] = args.coords[1];
+ args.coords[1] = filler;
}
}
/* Pack LOD or sample index */
- if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXF)
- address[count++] = coords[3];
+ if (opcode == TGSI_OPCODE_TXL)
+ args.lod = args.coords[3];
else if (opcode == TGSI_OPCODE_TXL2)
- address[count++] = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
-
- if (count > 16) {
- assert(!"Cannot handle more than 16 texture address parameters");
- count = 16;
+ args.lod = lp_build_emit_fetch(bld_base, inst, 1, TGSI_CHAN_X);
+ else if (opcode == TGSI_OPCODE_TXF) {
+ if (target == TGSI_TEXTURE_2D_MSAA) {
+ /* No LOD, but move sample index into the right place. */
+ args.coords[2] = args.coords[3];
+ } else if (target != TGSI_TEXTURE_2D_ARRAY_MSAA) {
+ args.lod = args.coords[3];
+ }
}
- for (chan = 0; chan < count; chan++)
- address[chan] = ac_to_integer(&ctx->ac, address[chan]);
-
if (target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
- ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, address,
+ ac_apply_fmask_to_sample(&ctx->ac, fmask_ptr, args.coords,
target == TGSI_TEXTURE_2D_ARRAY_MSAA);
}
if (opcode == TGSI_OPCODE_TXF ||
opcode == TGSI_OPCODE_TXF_LZ) {
/* add tex offsets */
if (inst->Texture.NumOffsets) {
struct lp_build_context *uint_bld = &bld_base->uint_bld;
const struct tgsi_texture_offset *off = inst->TexOffsets;
assert(inst->Texture.NumOffsets == 1);
switch (target) {
case TGSI_TEXTURE_3D:
- address[2] = lp_build_add(uint_bld, address[2],
+ args.coords[2] = lp_build_add(uint_bld, args.coords[2],
ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleZ]);
/* fall through */
case TGSI_TEXTURE_2D:
case TGSI_TEXTURE_SHADOW2D:
case TGSI_TEXTURE_RECT:
case TGSI_TEXTURE_SHADOWRECT:
case TGSI_TEXTURE_2D_ARRAY:
case TGSI_TEXTURE_SHADOW2D_ARRAY:
- address[1] =
- lp_build_add(uint_bld, address[1],
+ args.coords[1] =
+ lp_build_add(uint_bld, args.coords[1],
ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleY]);
/* fall through */
case TGSI_TEXTURE_1D:
case TGSI_TEXTURE_SHADOW1D:
case TGSI_TEXTURE_1D_ARRAY:
case TGSI_TEXTURE_SHADOW1D_ARRAY:
- address[0] =
- lp_build_add(uint_bld, address[0],
+ args.coords[0] =
+ lp_build_add(uint_bld, args.coords[0],
ctx->imms[off->Index * TGSI_NUM_CHANNELS + off->SwizzleX]);
break;
/* texture offsets do not apply to other texture targets */
}
}
}
+ args.dmask = 0xf;
+
if (opcode == TGSI_OPCODE_TG4) {
unsigned gather_comp = 0;
/* DMASK was repurposed for GATHER4. 4 components are always
* returned and DMASK works like a swizzle - it selects
* the component to fetch. The only valid DMASK values are
* 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
* (red,red,red,red) etc.) The ISA document doesn't mention
* this.
*/
@@ -1604,25 +1566,24 @@ static void tex_fetch_args(
LLVMValueRef comp_imm;
struct tgsi_src_register src1 = inst->Src[1].Register;
assert(src1.File == TGSI_FILE_IMMEDIATE);
comp_imm = ctx->imms[src1.Index * TGSI_NUM_CHANNELS + src1.SwizzleX];
gather_comp = LLVMConstIntGetZExtValue(comp_imm);
gather_comp = CLAMP(gather_comp, 0, 3);
}
- dmask = 1 << gather_comp;
+ args.dmask = 1 << gather_comp;
}
- set_tex_fetch_args(ctx, emit_data, target, res_ptr,
- samp_ptr, address, count, dmask);
+ set_tex_fetch_args(ctx, emit_data, &args, target);
}
/* Gather4 should follow the same rules as bilinear filtering, but the hardware
* incorrectly forces nearest filtering if the texture format is integer.
* The only effect it has on Gather4, which always returns 4 texels for
* bilinear filtering, is that the final coordinates are off by 0.5 of
* the texel size.
*
* The workaround is to subtract 0.5 from the unnormalized coordinates,
* or (0.5 / size) from the normalized coordinates.
@@ -1634,28 +1595,21 @@ static void tex_fetch_args(
* descriptor was overridden (and hence a fixup of the sampler result is needed).
*/
static LLVMValueRef
si_lower_gather4_integer(struct si_shader_context *ctx,
struct ac_image_args *args,
unsigned target,
enum tgsi_return_type return_type)
{
LLVMBuilderRef builder = ctx->ac.builder;
LLVMValueRef wa_8888 = NULL;
- LLVMValueRef coord = args->addr;
LLVMValueRef half_texel[2];
- /* Texture coordinates start after:
- * {offset, bias, z-compare, derivatives}
- * Only the offset and z-compare can occur here.
- */
- unsigned coord_vgpr_index = (int)args->offset + (int)args->compare;
- int c;
assert(return_type == TGSI_RETURN_TYPE_SINT ||
return_type == TGSI_RETURN_TYPE_UINT);
if (target == TGSI_TEXTURE_CUBE ||
target == TGSI_TEXTURE_CUBE_ARRAY) {
LLVMValueRef formats;
LLVMValueRef data_format;
LLVMValueRef wa_formats;
@@ -1684,76 +1638,73 @@ si_lower_gather4_integer(struct si_shader_context *ctx,
args->resource = LLVMBuildInsertElement(
builder, args->resource, formats, ctx->i32_1, "");
}
if (target == TGSI_TEXTURE_RECT ||
target == TGSI_TEXTURE_SHADOWRECT) {
assert(!wa_8888);
half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
} else {
struct tgsi_full_instruction txq_inst = {};
+ struct ac_image_args txq_args = {};
struct lp_build_emit_data txq_emit_data = {};
struct lp_build_if_state if_ctx;
if (wa_8888) {
/* Skip the texture size query entirely if we don't need it. */
lp_build_if(&if_ctx, &ctx->gallivm, LLVMBuildNot(builder, wa_8888, ""));
}
/* Query the texture size. */
txq_inst.Texture.Texture = target;
txq_emit_data.inst = &txq_inst;
txq_emit_data.dst_type = ctx->v4i32;
- set_tex_fetch_args(ctx, &txq_emit_data, target,
- args->resource, NULL, &ctx->i32_0,
- 1, 0xf);
+ txq_args.resource = args->resource;
+ txq_args.sampler = args->sampler;
+ txq_args.lod = ctx->ac.i32_0;
+ txq_args.dmask = 0xf;
+ set_tex_fetch_args(ctx, &txq_emit_data, &txq_args, target);
txq_emit(NULL, &ctx->bld_base, &txq_emit_data);
/* Compute -0.5 / size. */
- for (c = 0; c < 2; c++) {
+ for (unsigned c = 0; c < 2; c++) {
half_texel[c] =
LLVMBuildExtractElement(builder, txq_emit_data.output[0],
LLVMConstInt(ctx->i32, c, 0), "");
half_texel[c] = LLVMBuildUIToFP(builder, half_texel[c], ctx->f32, "");
half_texel[c] =
lp_build_emit_llvm_unary(&ctx->bld_base,
TGSI_OPCODE_RCP, half_texel[c]);
half_texel[c] = LLVMBuildFMul(builder, half_texel[c],
LLVMConstReal(ctx->f32, -0.5), "");
}
if (wa_8888) {
lp_build_endif(&if_ctx);
LLVMBasicBlockRef bb[2] = { if_ctx.true_block, if_ctx.entry_block };
- for (c = 0; c < 2; c++) {
+ for (unsigned c = 0; c < 2; c++) {
LLVMValueRef values[2] = { half_texel[c], ctx->ac.f32_0 };
half_texel[c] = ac_build_phi(&ctx->ac, ctx->f32, 2,
values, bb);
}
}
}
- for (c = 0; c < 2; c++) {
+ for (unsigned c = 0; c < 2; c++) {
LLVMValueRef tmp;
- LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
-
- tmp = LLVMBuildExtractElement(builder, coord, index, "");
- tmp = ac_to_float(&ctx->ac, tmp);
+ tmp = ac_to_float(&ctx->ac, args->coords[c]);
tmp = LLVMBuildFAdd(builder, tmp, half_texel[c], "");
- tmp = ac_to_integer(&ctx->ac, tmp);
- coord = LLVMBuildInsertElement(builder, coord, tmp, index, "");
+ args->coords[c] = ac_to_integer(&ctx->ac, tmp);
}
- args->addr = coord;
-
return wa_8888;
}
/* The second half of the cube texture 8_8_8_8 integer workaround: adjust the
* result after the gather operation.
*/
static LLVMValueRef
si_fix_gather4_integer_result(struct si_shader_context *ctx,
LLVMValueRef result,
enum tgsi_return_type return_type,
@@ -1804,58 +1755,49 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
emit_data->args[1],
num_channels, false, true);
emit_data->output[emit_data->chan] =
ac_build_expand_to_vec4(&ctx->ac, result, num_channels);
return;
}
memcpy(&args, emit_data->args, sizeof(args)); /* ugly */
args.opcode = ac_image_sample;
- args.compare = tgsi_is_shadow_target(target);
- args.offset = inst->Texture.NumOffsets > 0;
switch (opcode) {
case TGSI_OPCODE_TXF:
case TGSI_OPCODE_TXF_LZ:
args.opcode = opcode == TGSI_OPCODE_TXF_LZ ||
target == TGSI_TEXTURE_2D_MSAA ||
target == TGSI_TEXTURE_2D_ARRAY_MSAA ?
ac_image_load : ac_image_load_mip;
- args.compare = false;
- args.offset = false;
break;
case TGSI_OPCODE_LODQ:
args.opcode = ac_image_get_lod;
- args.compare = false;
- args.offset = false;
break;
case TGSI_OPCODE_TEX:
case TGSI_OPCODE_TEX2:
case TGSI_OPCODE_TXP:
if (ctx->type != PIPE_SHADER_FRAGMENT)
args.level_zero = true;
break;
case TGSI_OPCODE_TEX_LZ:
args.level_zero = true;
break;
case TGSI_OPCODE_TXB:
case TGSI_OPCODE_TXB2:
assert(ctx->type == PIPE_SHADER_FRAGMENT);
- args.bias = true;
break;
case TGSI_OPCODE_TXL:
case TGSI_OPCODE_TXL2:
- args.lod = true;
break;
case TGSI_OPCODE_TXD:
- args.deriv = true;
break;
case TGSI_OPCODE_TG4:
args.opcode = ac_image_gather4;
args.level_zero = true;
break;
default:
assert(0);
return;
}
@@ -1890,21 +1832,20 @@ static void si_llvm_emit_txqs(
const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct si_shader_context *ctx = si_shader_context(bld_base);
LLVMValueRef res, samples;
LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL;
tex_fetch_ptrs(bld_base, emit_data, &res_ptr, &samp_ptr, &fmask_ptr);
-
/* Read the samples from the descriptor directly. */
res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->v8i32, "");
samples = LLVMBuildExtractElement(ctx->ac.builder, res,
LLVMConstInt(ctx->i32, 3, 0), "");
samples = LLVMBuildLShr(ctx->ac.builder, samples,
LLVMConstInt(ctx->i32, 16, 0), "");
samples = LLVMBuildAnd(ctx->ac.builder, samples,
LLVMConstInt(ctx->i32, 0xf, 0), "");
samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
samples, "");
@@ -1925,50 +1866,43 @@ static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
*/
/* Load the image descriptor. */
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
ptr = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
ac_array_in_const32_addr_space(ctx->v8i32), "");
image = ac_build_load_to_sgpr(&ctx->ac, ptr,
LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
- LLVMValueRef addr[4];
unsigned chan = 0;
- addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16);
+ args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 0, 16);
if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
- addr[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16);
+ args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_POS_FIXED_PT, 16, 16);
/* Get the current render target layer index. */
if (ctx->shader->key.mono.u.ps.fbfetch_layered)
- addr[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11);
+ args.coords[chan++] = si_unpack_param(ctx, SI_PARAM_ANCILLARY, 16, 11);
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
- addr[chan++] = si_get_sample_id(ctx);
-
- while (chan < 4)
- addr[chan++] = LLVMGetUndef(ctx->i32);
+ args.coords[chan++] = si_get_sample_id(ctx);
if (ctx->shader->key.mono.u.ps.fbfetch_msaa) {
fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
- ac_apply_fmask_to_sample(&ctx->ac, fmask, addr, false);
+ ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords, false);
}
- addr_vec = ac_build_gather_values(&ctx->ac, addr, ARRAY_SIZE(addr));
-
args.opcode = ac_image_load;
args.resource = image;
- args.addr = addr_vec;
args.dmask = 0xf;
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
ac_image_2darraymsaa : ac_image_2dmsaa;
else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
ac_image_1darray : ac_image_1d;
else
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
ac_image_2darray : ac_image_2d;
--
2.14.1
More information about the mesa-dev
mailing list