[Mesa-dev] [PATCH v2 4/5] nvc0: add support for programmable sample locations
Rhys Perry
pendingchaos02 at gmail.com
Tue May 29 16:48:48 UTC 2018
EvaluateDepthValuesARB()/ResolveDepthValuesNV() is a hint for the
driver to decompress the depth buffer if needed. This can be needed
because the decompressed result can depend on the current sample
locations.
Fiddling around with the current state of the patches, I could not
find a case where it seemed that compressed depth values depended
on the sample locations. I figured the depth values in the test were
rather compressible, but I don't know any details about Nvidia's
depth compression.
I wouldn't mind running a trace of the blob and see if it does
anything though, if you want to be more sure.
As for the MS=1 thing, it's for the unlikely case that someone wants
to create a single sample texture through some other API than OpenGL
or just direct gallium and wants to program the sample locations.
It doesn't matter much, though I think it's pretty harmless.
On Mon, May 28, 2018 at 9:05 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> ARB_sample_locaitons has all this stuff about a resolve of some sort
> when you switch around the locations. I don't see anything here about
> that. Thoughts?
>
> Also some more specific comments inline:
>
> On Thu, May 10, 2018 at 12:28 PM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
>> Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
>> ---
>> .../drivers/nouveau/codegen/nv50_ir_driver.h | 2 +
>> .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 7 +
>> .../nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 91 +++++++++--
>> .../nouveau/codegen/nv50_ir_lowering_nvc0.h | 2 +
>> src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 1 +
>> src/gallium/drivers/nouveau/nv50/nv50_resource.h | 1 +
>> src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 15 +-
>> src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 1 +
>> src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 3 +
>> src/gallium/drivers/nouveau/nvc0/nvc0_screen.c | 33 +++-
>> src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 17 +-
>> .../drivers/nouveau/nvc0/nvc0_state_validate.c | 174 +++++++++++++++++----
>> 12 files changed, 301 insertions(+), 46 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>> index 3d0782f86b..7c835ceab8 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
>> @@ -73,6 +73,7 @@ struct nv50_ir_prog_symbol
>> #define NVISA_GK104_CHIPSET 0xe0
>> #define NVISA_GK20A_CHIPSET 0xea
>> #define NVISA_GM107_CHIPSET 0x110
>> +#define NVISA_GM200_CHIPSET 0x120
>>
>> struct nv50_ir_prog_info
>> {
>> @@ -145,6 +146,7 @@ struct nv50_ir_prog_info
>> bool persampleInvocation;
>> bool usesSampleMaskIn;
>> bool readsFramebuffer;
>> + bool readsSampleLocations;
>> } fp;
>> struct {
>> uint32_t inputOffset; /* base address for user args */
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> index 3c5bad05fe..d7844d7381 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> @@ -1520,6 +1520,10 @@ void Source::scanInstructionSrc(const Instruction& insn,
>> info->out[src.getIndex(0)].oread = 1;
>> }
>> }
>> + if (src.getFile() == TGSI_FILE_SYSTEM_VALUE) {
>> + if (info->sv[src.getIndex(0)].sn == TGSI_SEMANTIC_SAMPLEPOS)
>> + info->prop.fp.readsSampleLocations = true;
>> + }
>> if (src.getFile() != TGSI_FILE_INPUT)
>> return;
>>
>> @@ -1560,6 +1564,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
>> if (insn.getOpcode() == TGSI_OPCODE_FBFETCH)
>> info->prop.fp.readsFramebuffer = true;
>>
>> + if (insn.getOpcode() == TGSI_OPCODE_INTERP_SAMPLE)
>> + info->prop.fp.readsSampleLocations = true;
>> +
>> if (insn.dstCount()) {
>> Instruction::DstRegister dst = insn.getDst(0);
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index 29f674b451..5f5298777e 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -2662,17 +2662,33 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>> ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
>> break;
>> case SV_SAMPLE_POS: {
>> - Value *off = new_LValue(func, FILE_GPR);
>> - ld = bld.mkOp1(OP_PIXLD, TYPE_U32, i->getDef(0), bld.mkImm(0));
>> + Value *sampleID = bld.getScratch();
>> + ld = bld.mkOp1(OP_PIXLD, TYPE_U32, sampleID, bld.mkImm(0));
>> ld->subOp = NV50_IR_SUBOP_PIXLD_SAMPLEID;
>> - bld.mkOp2(OP_SHL, TYPE_U32, off, i->getDef(0), bld.mkImm(3));
>> - bld.mkLoad(TYPE_F32,
>> - i->getDef(0),
>> - bld.mkSymbol(
>> - FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
>> - TYPE_U32, prog->driver->io.sampleInfoBase +
>> - 4 * sym->reg.data.sv.index),
>> - off);
>> + Value *offset = calculateSampleOffset(sampleID);
>> +
>> + assert(prog->driver->prop.fp.readsSampleLocations);
>> +
>> + if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
>> + bld.mkLoad(TYPE_F32,
>> + i->getDef(0),
>> + bld.mkSymbol(
>> + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
>> + TYPE_U32, prog->driver->io.sampleInfoBase),
>> + offset);
>> + bld.mkOp2(OP_EXTBF, TYPE_U32, i->getDef(0), i->getDef(0),
>> + bld.mkImm(0x040c + sym->reg.data.sv.index * 16));
>> + bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), TYPE_U32, i->getDef(0));
>> + bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), bld.mkImm(1.0f / 16.0f));
>> + } else {
>> + bld.mkLoad(TYPE_F32,
>> + i->getDef(0),
>> + bld.mkSymbol(
>> + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
>> + TYPE_U32, prog->driver->io.sampleInfoBase +
>> + 4 * sym->reg.data.sv.index),
>> + offset);
>> + }
>> break;
>> }
>> case SV_SAMPLE_MASK: {
>> @@ -2832,6 +2848,58 @@ NVC0LoweringPass::handleOUT(Instruction *i)
>> return true;
>> }
>>
>> +Value *
>> +NVC0LoweringPass::calculateSampleOffset(Value *sampleID)
>> +{
>> + Value *offset = bld.getScratch();
>> + if (targ->getChipset() >= NVISA_GM200_CHIPSET) {
>
> This all needs a lot more comments. I think I sorta get what this is
> doing (i.e. figuring where it is in the grid), but all the bit
> encoding is confusing. Having some comments here about what's in the
> data, and for the forgetful, what each INSBF will do, will be helpful.
>
>> + // Add sample ID
>> + bld.mkOp3(OP_INSBF, TYPE_U32, offset, sampleID, bld.mkImm(0x0302), bld.mkImm(0x0));
>> +
>> + Symbol *xSym = bld.mkSysVal(SV_POSITION, 0);
>> + Symbol *ySym = bld.mkSysVal(SV_POSITION, 1);
>> + Value *coord = bld.getScratch();
>> +
>> + // Add X coordinate
>> + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
>> + targ->getSVAddress(FILE_SHADER_INPUT, xSym), NULL);
>> + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
>> + ->rnd = ROUND_ZI;
>> + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0105), offset);
>> +
>> + // Add Y coordinate
>> + bld.mkInterp(NV50_IR_INTERP_LINEAR, coord,
>> + targ->getSVAddress(FILE_SHADER_INPUT, ySym), NULL);
>> + bld.mkCvt(OP_CVT, TYPE_U32, coord, TYPE_F32, coord)
>> + ->rnd = ROUND_ZI;
>> + bld.mkOp3(OP_INSBF, TYPE_U32, offset, coord, bld.mkImm(0x0206), offset);
>> + } else {
>> + bld.mkOp2(OP_SHL, TYPE_U32, offset, sampleID, bld.mkImm(3));
>> + }
>> + return offset;
>> +}
>> +
>> +// Handle programmable sample locations for GM20x+
>> +void
>> +NVC0LoweringPass::handlePIXLD(Instruction *i)
>> +{
>> + if (i->subOp != NV50_IR_SUBOP_PIXLD_OFFSET)
>> + return;
>> + if (targ->getChipset() < NVISA_GM200_CHIPSET)
>> + return;
>> +
>> + assert(prog->driver->prop.fp.readsSampleLocations);
>> +
>> + bld.mkLoad(TYPE_F32,
>> + i->getDef(0),
>> + bld.mkSymbol(
>> + FILE_MEMORY_CONST, prog->driver->io.auxCBSlot,
>> + TYPE_U32, prog->driver->io.sampleInfoBase),
>> + calculateSampleOffset(i->getSrc(0)));
>> +
>> + bld.getBB()->remove(i);
>> +}
>> +
>> // Generate a binary predicate if an instruction is predicated by
>> // e.g. an f32 value.
>> void
>> @@ -2931,6 +2999,9 @@ NVC0LoweringPass::visit(Instruction *i)
>> case OP_BUFQ:
>> handleBUFQ(i);
>> break;
>> + case OP_PIXLD:
>> + handlePIXLD(i);
>> + break;
>> default:
>> break;
>> }
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>> index 1b2b36d3cc..91771fbf7e 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
>> @@ -116,6 +116,7 @@ protected:
>> void handleSharedATOMNVE4(Instruction *);
>> void handleLDST(Instruction *);
>> bool handleBUFQ(Instruction *);
>> + void handlePIXLD(Instruction *);
>>
>> void checkPredicate(Instruction *);
>>
>> @@ -142,6 +143,7 @@ private:
>> void processSurfaceCoordsNVE4(TexInstruction *);
>> void processSurfaceCoordsNVC0(TexInstruction *);
>> void convertSurfaceFormat(TexInstruction *);
>> + Value *calculateSampleOffset(Value *sampleID);
>>
>> protected:
>> Value *loadTexHandle(Value *ptr, unsigned int slot);
>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
>> index f2e304fde6..ac76a9446b 100644
>> --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
>> @@ -209,6 +209,7 @@ const struct u_resource_vtbl nv50_miptree_vtbl =
>> static inline bool
>> nv50_miptree_init_ms_mode(struct nv50_miptree *mt)
>> {
>> + mt->multisampling = mt->base.base.nr_samples > 0;
>> switch (mt->base.base.nr_samples) {
>> case 8:
>> mt->ms_mode = NV50_3D_MULTISAMPLE_MODE_MS8;
>> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_resource.h b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
>> index 5d03925b0d..ba4fc0c64c 100644
>> --- a/src/gallium/drivers/nouveau/nv50/nv50_resource.h
>> +++ b/src/gallium/drivers/nouveau/nv50/nv50_resource.h
>> @@ -54,6 +54,7 @@ struct nv50_miptree {
>> uint8_t ms_x; /* log2 of number of samples in x/y dimension */
>> uint8_t ms_y;
>> uint8_t ms_mode;
>> + bool multisampling; /* true if nr_samples > 0 on creation */
>
> Isn't ms_mode != 0 sufficient? I think we've killed MS=1 stuff at the
> st/mesa level.
>
>> };
>>
>> static inline struct nv50_miptree *
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> index 0729c88dff..77237a3c0a 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> @@ -62,6 +62,8 @@
>> #define NVC0_NEW_3D_DRIVERCONST (1 << 27)
>> #define NVC0_NEW_3D_WINDOW_RECTS (1 << 28)
>>
>> +#define NVC0_NEW_3D_SAMPLE_LOCATIONS (1 << 29)
>> +
>> #define NVC0_NEW_CP_PROGRAM (1 << 0)
>> #define NVC0_NEW_CP_SURFACES (1 << 1)
>> #define NVC0_NEW_CP_TEXTURES (1 << 2)
>> @@ -134,20 +136,21 @@
>> #define NVC0_CB_AUX_UBO_SIZE ((NVC0_MAX_PIPE_CONSTBUFS - 1) * 4 * 4)
>> /* 8 sets of 32-bits integer pairs sample offsets */
>> #define NVC0_CB_AUX_SAMPLE_INFO 0x1a0 /* FP */
>> -#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 4 * 2)
>> +/* 256 bytes, though only 64 bytes used before GM200 */
>> +#define NVC0_CB_AUX_SAMPLE_SIZE (8 * 2 * 4 * 4)
>> /* draw parameters (index bais, base instance, drawid) */
>> #define NVC0_CB_AUX_DRAW_INFO 0x1a0 /* VP */
>> /* 32 user buffers, at 4 32-bits integers each */
>> -#define NVC0_CB_AUX_BUF_INFO(i) 0x220 + (i) * 4 * 4
>> +#define NVC0_CB_AUX_BUF_INFO(i) 0x2a0 + (i) * 4 * 4
>> #define NVC0_CB_AUX_BUF_SIZE (NVC0_MAX_BUFFERS * 4 * 4)
>> /* 8 surfaces, at 16 32-bits integers each */
>> -#define NVC0_CB_AUX_SU_INFO(i) 0x420 + (i) * 16 * 4
>> +#define NVC0_CB_AUX_SU_INFO(i) 0x4a0 + (i) * 16 * 4
>> #define NVC0_CB_AUX_SU_SIZE (NVC0_MAX_IMAGES * 16 * 4)
>> /* 1 64-bits address and 1 32-bits sequence */
>> -#define NVC0_CB_AUX_MP_INFO 0x620
>> +#define NVC0_CB_AUX_MP_INFO 0x6a0
>> #define NVC0_CB_AUX_MP_SIZE 3 * 4
>> /* 512 64-byte blocks for bindless image handles */
>> -#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x630 + (i) * 16 * 4
>> +#define NVC0_CB_AUX_BINDLESS_INFO(i) 0x6b0 + (i) * 16 * 4
>> #define NVC0_CB_AUX_BINDLESS_SIZE (NVE4_IMG_MAX_HANDLES * 16 * 4)
>> /* 4 32-bits floats for the vertex runout, put at the end */
>> #define NVC0_CB_AUX_RUNOUT_INFO NVC0_CB_USR_SIZE + (NVC0_CB_AUX_SIZE * 6)
>> @@ -229,6 +232,8 @@ struct nvc0_context {
>> struct list_head img_head;
>>
>> struct pipe_framebuffer_state framebuffer;
>> + bool sample_locations_enabled;
>> + uint8_t sample_locations[2 * 4 * 8];
>> struct pipe_blend_color blend_colour;
>> struct pipe_stencil_ref stencil_ref;
>> struct pipe_poly_stipple stipple;
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
>> index 7983c40308..4607d53576 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
>> @@ -139,6 +139,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
>> static inline bool
>> nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
>> {
>> + mt->multisampling = mt->base.base.nr_samples > 0;
>> switch (mt->base.base.nr_samples) {
>> case 8:
>> mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8;
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> index 9520d984bb..57d98753f4 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
>> @@ -481,6 +481,9 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
>> }
>> }
>> }
>> + /* GM20x+ needs TGSI_SEMANTIC_POSITION to access sample locations */
>> + if (info->prop.fp.readsSampleLocations && info->target >= NVISA_GM200_CHIPSET)
>> + fp->hdr[5] |= 0x30000000;
>>
>> for (i = 0; i < info->numOutputs; ++i) {
>> if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>> index 45ac7dd626..53108cf44c 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
>> @@ -269,6 +269,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>> case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES:
>> case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES:
>> case PIPE_CAP_CONSERVATIVE_RASTER_POST_DEPTH_COVERAGE:
>> + case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
>> return class_3d >= GM200_3D_CLASS;
>> case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES:
>> return class_3d >= GP100_3D_CLASS;
>> @@ -317,7 +318,6 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
>> case PIPE_CAP_CONSTBUF0_FLAGS:
>> case PIPE_CAP_PACKED_UNIFORMS:
>> case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_POINTS_LINES:
>> - case PIPE_CAP_PROGRAMMABLE_SAMPLE_LOCATIONS:
>> return 0;
>>
>> case PIPE_CAP_VENDOR_ID:
>> @@ -543,6 +543,36 @@ nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
>> #undef RET
>> }
>>
>> +static void
>> +nvc0_screen_get_sample_pixel_grid(struct pipe_screen *pscreen,
>> + unsigned sample_count,
>> + unsigned *width, unsigned *height)
>> +{
>> + switch (sample_count) {
>> + case 0:
>> + case 1:
>> + /* this could be 4x4, but the GL state tracker makes it difficult to
>> + * create a 1x MSAA texture and smaller grids save CB space */
>> + *width = 2;
>> + *height = 4;
>> + break;
>> + case 2:
>> + *width = 2;
>> + *height = 4;
>> + break;
>> + case 4:
>> + *width = 2;
>> + *height = 2;
>> + break;
>> + case 8:
>> + *width = 1;
>> + *height = 2;
>> + break;
>> + default:
>> + assert(0);
>> + }
>> +}
>> +
>> static void
>> nvc0_screen_destroy(struct pipe_screen *pscreen)
>> {
>> @@ -869,6 +899,7 @@ nvc0_screen_create(struct nouveau_device *dev)
>> pscreen->get_param = nvc0_screen_get_param;
>> pscreen->get_shader_param = nvc0_screen_get_shader_param;
>> pscreen->get_paramf = nvc0_screen_get_paramf;
>> + pscreen->get_sample_pixel_grid = nvc0_screen_get_sample_pixel_grid;
>> pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
>> pscreen->get_driver_query_group_info = nvc0_screen_get_driver_query_group_info;
>>
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> index 37dbbe66c7..d9ee62523b 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
>> @@ -854,7 +854,21 @@ nvc0_set_framebuffer_state(struct pipe_context *pipe,
>>
>> util_copy_framebuffer_state(&nvc0->framebuffer, fb);
>>
>> - nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER;
>> + nvc0->dirty_3d |= NVC0_NEW_3D_FRAMEBUFFER | NVC0_NEW_3D_SAMPLE_LOCATIONS;
>> +}
>> +
>> +static void
>> +nvc0_set_sample_locations(struct pipe_context *pipe,
>> + size_t size, const uint8_t *locations)
>> +{
>> + struct nvc0_context *nvc0 = nvc0_context(pipe);
>> +
>> + nvc0->sample_locations_enabled = size && locations;
>> + if (size > sizeof(nvc0->sample_locations))
>> + size = sizeof(nvc0->sample_locations);
>> + memcpy(nvc0->sample_locations, locations, size);
>> +
>> + nvc0->dirty_3d |= NVC0_NEW_3D_SAMPLE_LOCATIONS;
>> }
>>
>> static void
>> @@ -1407,6 +1421,7 @@ nvc0_init_state_functions(struct nvc0_context *nvc0)
>> pipe->set_min_samples = nvc0_set_min_samples;
>> pipe->set_constant_buffer = nvc0_set_constant_buffer;
>> pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
>> + pipe->set_sample_locations = nvc0_set_sample_locations;
>> pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
>> pipe->set_scissor_states = nvc0_set_scissor_states;
>> pipe->set_viewport_states = nvc0_set_viewport_states;
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
>> index 8e2192d3de..ccfe814658 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
>> @@ -71,13 +71,154 @@ nvc0_fb_set_null_rt(struct nouveau_pushbuf *push, unsigned i, unsigned layers)
>> PUSH_DATA (push, 0); // base layer
>> }
>>
>> +static uint32_t
>> +nv120_encode_cb_sample_location(uint8_t x, uint8_t y)
>
> gm200_... (and same below)
>
>> +{
>> + static const uint8_t lut[] = {
>> + 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
>> + 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7};
>> + uint32_t result = 0;
>> + /* S0.12 representation for TGSI_OPCODE_INTERP_SAMPLE */
>> + result |= lut[x] << 8 | lut[y] << 24;
>> + /* fill in gaps with data in a representation for SV_SAMPLE_POS */
>> + result |= x << 12 | y << 28;
>> + return result;
>> +}
>> +
>> +static void
>> +nv120_validate_sample_locations(struct nvc0_context *nvc0,
>> + unsigned ms, bool multisampling)
>> +{
>> + struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>> + struct nvc0_screen *screen = nvc0->screen;
>> + unsigned grid_width, grid_height, hw_grid_width;
>> + uint8_t sample_locations[16][2];
>> + unsigned cb[64];
>> + unsigned i, pixel, pixel_y, pixel_x, sample;
>> +
>> + screen->base.base.get_sample_pixel_grid(
>> + &screen->base.base, ms, &grid_width, &grid_height);
>> +
>> + hw_grid_width = grid_width;
>> + if (ms == 1) /* get_sample_pixel_grid() exposes 2x4 for 1x msaa */
>> + hw_grid_width = 4;
>> +
>> + if (!multisampling) {
>> + memset(sample_locations, 8, sizeof(sample_locations));
>> + } else if (nvc0->sample_locations_enabled) {
>> + uint8_t locations[2 * 4 * 8];
>> + memcpy(locations, nvc0->sample_locations, sizeof(locations));
>> + util_sample_locations_flip_y(
>> + &screen->base.base, nvc0->framebuffer.height, ms, locations);
>> +
>> + for (pixel = 0; pixel < hw_grid_width*grid_height; pixel++) {
>> + for (sample = 0; sample < ms; sample++) {
>> + unsigned pixel_x = pixel % hw_grid_width;
>> + unsigned pixel_y = pixel / hw_grid_width;
>> + unsigned wi = pixel * ms + sample;
>> + unsigned ri = (pixel_y * grid_width + pixel_x % grid_width);
>> + ri = ri * ms + sample;
>> + sample_locations[wi][0] = locations[ri] & 0xf;
>> + sample_locations[wi][1] = 16 - (locations[ri] >> 4);
>> + }
>> + }
>> + } else {
>> + const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
>> + for (i = 0; i < 16; i++) {
>> + sample_locations[i][0] = ptr[i % ms][0];
>> + sample_locations[i][1] = ptr[i % ms][1];
>> + }
>> + }
>> +
>> + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>> + PUSH_DATA (push, NVC0_CB_AUX_SIZE);
>> + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
>> + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
>> + BEGIN_1IC0(push, NVC0_3D(CB_POS), 65);
>
> 1 + 64
>
>> + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
>> + for (pixel_y = 0; pixel_y < 4; pixel_y++) {
>> + for (pixel_x = 0; pixel_x < 2; pixel_x++) {
>> + for (sample = 0; sample < ms; sample++) {
>> + unsigned write_index = (pixel_y * 2 + pixel_x) * 8 + sample;
>> + unsigned read_index = pixel_y % grid_height * hw_grid_width;
>> + read_index += pixel_x % grid_width;
>> + read_index = read_index * ms + sample;
>> + uint8_t x = sample_locations[read_index][0];
>> + uint8_t y = sample_locations[read_index][1];
>> + cb[write_index] = nv120_encode_cb_sample_location(x, y);
>> + }
>> + }
>> + }
>> + PUSH_DATAp(push, cb, 64);
>> +
>> + if (screen->base.class_3d >= GM200_3D_CLASS) {
>> + uint32_t val[4] = {};
>> +
>> + for (i = 0; i < 16; i++) {
>> + val[i / 4] |= sample_locations[i][0] << ((i % 4) * 8);
>> + val[i / 4] |= sample_locations[i][1] << ((i % 4) * 8 + 4);
>> + }
>> +
>> + BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
>> + PUSH_DATAp(push, val, 4);
>> + }
>> +}
>> +
>> +static void
>> +nvc0_validate_sample_locations(struct nvc0_context *nvc0, unsigned ms)
>> +{
>> + struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>> + struct nvc0_screen *screen = nvc0->screen;
>> + unsigned i;
>> +
>> + BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>> + PUSH_DATA (push, NVC0_CB_AUX_SIZE);
>> + PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
>> + PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
>> + BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
>> + PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
>> + for (i = 0; i < ms; i++) {
>> + float xy[2];
>> + nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
>> + PUSH_DATAf(push, xy[0]);
>> + PUSH_DATAf(push, xy[1]);
>> + }
>> +}
>> +
>> +static void
>> +validate_sample_locations(struct nvc0_context *nvc0)
>> +{
>> + struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
>> + unsigned ms = util_framebuffer_get_num_samples(fb);
>> + bool multisampling = false;
>> +
>> + if (fb->zsbuf)
>> + multisampling = nv50_miptree(fb->zsbuf->texture)->multisampling;
>> + else
>> + if (fb->nr_cbufs) {
>> + unsigned i;
>> + for (i = 0; i < fb->nr_cbufs; ++i) {
>> + struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
>> +
>> + if (sf && sf->base.texture->target != PIPE_BUFFER)
>> + multisampling = nv50_miptree(sf->base.texture)->multisampling;
>> + }
>> + } else
>> + if (fb->samples > 1)
>> + multisampling = true;
>> +
>> + if (nvc0->screen->base.class_3d >= GM200_3D_CLASS)
>> + nv120_validate_sample_locations(nvc0, ms, multisampling);
>> + else
>> + nvc0_validate_sample_locations(nvc0, ms);
>> +}
>> +
>> static void
>> nvc0_validate_fb(struct nvc0_context *nvc0)
>> {
>> struct nouveau_pushbuf *push = nvc0->base.pushbuf;
>> struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
>> - struct nvc0_screen *screen = nvc0->screen;
>> - unsigned i, ms;
>> + unsigned i;
>> unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
>> unsigned nr_cbufs = fb->nr_cbufs;
>> bool serialize = false;
>> @@ -197,33 +338,6 @@ nvc0_validate_fb(struct nvc0_context *nvc0)
>> PUSH_DATA (push, (076543210 << 4) | nr_cbufs);
>> IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
>>
>> - ms = 1 << ms_mode;
>> - BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
>> - PUSH_DATA (push, NVC0_CB_AUX_SIZE);
>> - PUSH_DATAh(push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
>> - PUSH_DATA (push, screen->uniform_bo->offset + NVC0_CB_AUX_INFO(4));
>> - BEGIN_1IC0(push, NVC0_3D(CB_POS), 1 + 2 * ms);
>> - PUSH_DATA (push, NVC0_CB_AUX_SAMPLE_INFO);
>> - for (i = 0; i < ms; i++) {
>> - float xy[2];
>> - nvc0->base.pipe.get_sample_position(&nvc0->base.pipe, ms, i, xy);
>> - PUSH_DATAf(push, xy[0]);
>> - PUSH_DATAf(push, xy[1]);
>> - }
>> -
>> - if (screen->base.class_3d >= GM200_3D_CLASS) {
>> - const uint8_t (*ptr)[2] = nvc0_get_sample_locations(ms);
>> - uint32_t val[4] = {};
>> -
>> - for (i = 0; i < 16; i++) {
>> - val[i / 4] |= ptr[i % ms][0] << (((i % 4) * 8) + 0);
>> - val[i / 4] |= ptr[i % ms][1] << (((i % 4) * 8) + 4);
>> - }
>> -
>> - BEGIN_NVC0(push, SUBC_3D(0x11e0), 4);
>> - PUSH_DATAp(push, val, 4);
>> - }
>> -
>> if (serialize)
>> IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
>>
>> @@ -879,6 +993,8 @@ validate_list_3d[] = {
>> NVC0_NEW_3D_TEVLPROG |
>> NVC0_NEW_3D_GMTYPROG },
>> { nvc0_validate_driverconst, NVC0_NEW_3D_DRIVERCONST },
>> + { validate_sample_locations, NVC0_NEW_3D_SAMPLE_LOCATIONS |
>> + NVC0_NEW_3D_FRAMEBUFFER},
>> };
>>
>> bool
>> --
>> 2.14.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list