[Mesa-dev] [PATCH v4 6/6] i965: gl_BaseVertex must be zero for non-indexed draw calls
Antia Puentes
apuentes at igalia.com
Tue Apr 10 08:28:09 UTC 2018
On 07/04/18 08:21, Jason Ekstrand wrote:
> On Fri, Apr 6, 2018 at 2:53 PM, Ian Romanick <idr at freedesktop.org
> <mailto:idr at freedesktop.org>> wrote:
>
> From: Antia Puentes <apuentes at igalia.com <mailto:apuentes at igalia.com>>
>
> We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID
> vertex element. The previous Vertex Elements order was:
>
> * VE 1: <BaseVertex/firstvertex, BaseInstance, VertexID, InstanceID>
> * VE 2: <Draw ID, 0, 0, 0>
>
> and now it is:
>
> * VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
> * VE 2: <Draw ID, BaseVertex, 0, 0>
>
> To move the BaseVertex keeping VE1 as it is, allows to keep
> pointing the
> vertex buffer associated to VE 1 to the indirect buffer for indirect
> draw calls.
>
> From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:
>
> "gl_BaseVertex holds the integer value passed to the baseVertex
> parameter to the command that resulted in the current shader
> invocation. In the case where the command has no baseVertex
> parameter,
> the value of gl_BaseVertex is zero."
>
> Fixes CTS tests:
>
> * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
> *
> KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstancedParameters
> *
> KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
> *
> KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysIndirectParameters
> *
> KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirectCountParameters
>
> v2 (idr): Make changes to brw_prepare_shader_draw_parameters matching
> those in genX(emit_vertices). Reformat commit message to 72 columns.
>
> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com
> <mailto:ian.d.romanick at intel.com>>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678
> <https://bugs.freedesktop.org/show_bug.cgi?id=102678>
> ---
> src/intel/compiler/brw_nir.c | 14 +++++----
> src/intel/compiler/brw_vec4.cpp | 14 +++++----
> src/mesa/drivers/dri/i965/brw_context.h | 32
> ++++++++++++++-----
> src/mesa/drivers/dri/i965/brw_draw.c | 45
> ++++++++++++++++++---------
> src/mesa/drivers/dri/i965/brw_draw_upload.c | 14 ++++-----
> src/mesa/drivers/dri/i965/genX_state_upload.c | 38
> +++++++++++-----------
> 6 files changed, 97 insertions(+), 60 deletions(-)
>
> diff --git a/src/intel/compiler/brw_nir.c
> b/src/intel/compiler/brw_nir.c
> index 16b0d86814f..16ab529737b 100644
> --- a/src/intel/compiler/brw_nir.c
> +++ b/src/intel/compiler/brw_nir.c
> @@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
> */
> const bool has_sgvs =
> nir->info.system_values_read &
> - (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> - BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
> + (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
> BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
> BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
> @@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>
> nir_intrinsic_set_base(load, num_inputs);
> switch (intrin->intrinsic) {
> - case nir_intrinsic_load_base_vertex:
> case nir_intrinsic_load_first_vertex:
> nir_intrinsic_set_component(load, 0);
> break;
> @@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
> nir_intrinsic_set_component(load, 3);
> break;
> case nir_intrinsic_load_draw_id:
> - /* gl_DrawID is stored right after gl_VertexID
> and friends
> - * if any of them exist.
> + case nir_intrinsic_load_base_vertex:
> + /* gl_DrawID and gl_BaseVertex are stored right
> after
> + gl_VertexID and friends if any of them exist.
> */
> nir_intrinsic_set_base(load, num_inputs +
> has_sgvs);
> - nir_intrinsic_set_component(load, 0);
> + if (intrin->intrinsic ==
> nir_intrinsic_load_draw_id)
> + nir_intrinsic_set_component(load, 0);
> + else
> + nir_intrinsic_set_component(load, 1);
> break;
> default:
> unreachable("Invalid system value intrinsic");
> diff --git a/src/intel/compiler/brw_vec4.cpp
> b/src/intel/compiler/brw_vec4.cpp
> index 1e384f5bf4d..d33caefdea9 100644
> --- a/src/intel/compiler/brw_vec4.cpp
> +++ b/src/intel/compiler/brw_vec4.cpp
> @@ -2825,14 +2825,19 @@ brw_compile_vs(const struct brw_compiler
> *compiler, void *log_data,
> * incoming vertex attribute. So, add an extra slot.
> */
> if (shader->info.system_values_read &
> - (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> - BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
> + (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
> BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
> BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
> nr_attribute_slots++;
> }
>
> + if (shader->info.system_values_read &
> + (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
> + BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
> + nr_attribute_slots++;
> + }
> +
> if (shader->info.system_values_read &
> BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
> prog_data->uses_basevertex = true;
> @@ -2853,12 +2858,9 @@ brw_compile_vs(const struct brw_compiler
> *compiler, void *log_data,
> BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
> prog_data->uses_instanceid = true;
>
> - /* gl_DrawID has its very own vec4 */
> if (shader->info.system_values_read &
> - BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
> + BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
> prog_data->uses_drawid = true;
> - nr_attribute_slots++;
> - }
>
> /* The 3DSTATE_VS documentation lists the lower bound on
> "Vertex URB Entry
> * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode.
> Empirically, in
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h
> b/src/mesa/drivers/dri/i965/brw_context.h
> index c65a22c38bb..6758b34f47e 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -898,20 +898,36 @@ struct brw_context
> } params;
>
> /**
> - * Buffer and offset used for GL_ARB_shader_draw_parameters
> - * (for now, only gl_BaseVertex).
> + * Buffer and offset used for GL_ARB_shader_draw_parameters
> which will
> + * point to the indirect buffer for indirect draw calls.
> */
> struct brw_bo *draw_params_bo;
> uint32_t draw_params_offset;
>
> + struct {
> + /**
> + * The value of gl_DrawID for the current _mesa_prim.
> This always comes
> + * in from it's own vertex buffer since it's not part of
> the indirect
> + * draw parameters.
> + */
> + int gl_drawid;
> +
> + /**
> + * The value of gl_BaseVertex for the current
> _mesa_prim. Although
> + * gl_BaseVertex is part of the indirect buffer for
> indexed draw calls,
> + * that is not longer the case for non-indexed draw
> calls, where it must
> + * be zero, so we store it in a different buffer.
> + */
> + int gl_basevertex;
> + } derived_params;
> +
> /**
> - * The value of gl_DrawID for the current _mesa_prim. This
> always comes
> - * in from it's own vertex buffer since it's not part of
> the indirect
> - * draw parameters.
> + * Buffer and offset used for GL_ARB_shader_draw_parameters
> which contains
> + * parameters that are not present in the indirect buffer.
> They will go in
> + * their own vertex element.
> */
> - int gl_drawid;
> - struct brw_bo *draw_id_bo;
> - uint32_t draw_id_offset;
> + struct brw_bo *derived_draw_params_bo;
> + uint32_t derived_draw_params_offset;
>
> /**
> * Pointer to the the buffer storing the indirect draw
> parameters. It
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
> b/src/mesa/drivers/dri/i965/brw_draw.c
> index f51f083178e..e9ec8d585d2 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -241,6 +241,14 @@ brw_emit_prim(struct brw_context *brw,
> prim->indirect_offset + 12);
> brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
> prim->indirect_offset + 16);
> +
> + /* Store the gl_BaseVertex value in its vertex buffer */
> + if (brw->draw.derived_draw_params_bo != NULL) {
> + brw_store_register_mem32(brw,
> brw->draw.derived_draw_params_bo,
> + GEN7_3DPRIM_BASE_VERTEX,
> + brw->draw.derived_draw_params_offset + 4);
> + brw_emit_mi_flush(brw);
> + }
>
>
> Oh, boy, this is tricky... First of all, it's a bit of a bummer that
> we can't just load the indirect buffer again for this. Not too much
> to do about it, I guess.
>
> Second, there be very scary dragons here. It turns out that, at least
> on Haswell (and possibly other platforms), reading from state
> registers while rendering is in-flight can lead to GPU hangs. Yes, I
> said "reading". We found this out the hard way while working on
> Vulkan indirect clear colors. The better thing to do here would be to
> use GPRs when available (I think they're safe but I'm not sure) or to
> do a MI_COPY_MEM_MEM which, of course, is only available on gen8+. On
> Ivy Bridge (and haswell if we're going to do a store_register_mem from
> a state register), we need to do a mi_flush *before* the store as well.
I see that this is complicated, I have thought in a different way to
implement this.
Instead of moving gl_BaseVertex to a VE2 and reading its value from
state registers:
- VE1 remains as: <firstvertex, BaseInstance, VertexID, InstanceID>
-> Patches 1-5 are still valid (I think) and we can still calculate the
VertexID as FirstVertex + VertexIDZeroBased.
- VE2 contains: <Draw ID, IsIndexedDraw, 0, 0>,
->when asked for glBaseVertex (nir_instrinsic_load_base_vertex), we
would return the value stored in FirstVertex is the draw call is
indexed, zero if it is not.
How does it sound?.
> } else {
> brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
> prim->indirect_offset + 12);
> @@ -815,7 +823,7 @@ brw_draw_single_prim(struct gl_context *ctx,
> }
>
> /* Determine if we need to flag BRW_NEW_VERTICES for updating the
> - * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect
> draw, we
> + * firstvertex or gl_BaseInstanceARB values. For indirect draw, we
> * always flag if the shader uses one of the values. For
> direct draws,
> * we only flag if the values change.
> */
> @@ -825,16 +833,12 @@ brw_draw_single_prim(struct gl_context *ctx,
> const struct brw_vs_prog_data *vs_prog_data =
> brw_vs_prog_data(brw->vs.base.prog_data);
> if (prim_id > 0) {
> - const bool uses_firstvertex =
> - vs_prog_data->uses_basevertex ||
> - vs_prog_data->uses_firstvertex;
> -
> const bool uses_draw_parameters =
> - uses_firstvertex ||
> + vs_prog_data->uses_firstvertex ||
> vs_prog_data->uses_baseinstance;
>
> if ((uses_draw_parameters && prim->is_indirect) ||
> - (uses_firstvertex &&
> + (vs_prog_data->uses_firstvertex &&
> brw->draw.params.firstvertex != new_firstvertex) ||
> (vs_prog_data->uses_baseinstance &&
> brw->draw.params.gl_baseinstance != new_baseinstance))
> @@ -861,17 +865,28 @@ brw_draw_single_prim(struct gl_context *ctx,
> }
>
> /* gl_DrawID always needs its own vertex buffer since it's not
> part of
> - * the indirect parameter buffer. If the program uses
> gl_DrawID we need
> - * to flag BRW_NEW_VERTICES. For the first iteration, we don't
> have
> - * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
> - * the loop.
> + * the indirect parameter buffer. This is the same for
> gl_BaseVertex, which
> + * is not part of the indirect parameter buffer for
> non-indexed draw calls.
> + * If the program uses gl_DrawID or, uses gl_BaseVertex and it
> is an indirect
> + * draw call or the value has changed, we need to flag
> BRW_NEW_VERTICES.
> + * For the first iteration, we don't have valid vs_prog_data,
> but we always
> + * flag BRW_NEW_VERTICES before the loop.
> */
> - brw->draw.gl_drawid = prim->draw_id;
> - brw_bo_unreference(brw->draw.draw_id_bo);
> - brw->draw.draw_id_bo = NULL;
> - if (prim_id > 0 && vs_prog_data->uses_drawid)
> + const int new_basevertex = prim->indexed ? prim->basevertex : 0;
> + if (prim_id > 0 &&
> + (vs_prog_data->uses_drawid ||
> + (vs_prog_data->uses_basevertex &&
> + (prim->is_indirect ||
> + brw->draw.derived_params.gl_basevertex !=
> new_basevertex))))
> brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>
> + brw->draw.derived_params.gl_drawid = prim->draw_id;
> + brw->draw.derived_params.gl_basevertex = new_basevertex;
> +
> + brw_bo_unreference(brw->draw.derived_draw_params_bo);
> + brw->draw.derived_draw_params_bo = NULL;
> + brw->draw.derived_draw_params_offset = 0;
> +
> if (devinfo->gen < 6)
> brw_set_prim(brw, prim);
> else
> diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> index 7573f780f23..54ba951d6fd 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
> @@ -704,11 +704,11 @@ brw_prepare_shader_draw_parameters(struct
> brw_context *brw)
> const struct brw_vs_prog_data *vs_prog_data =
> brw_vs_prog_data(brw->vs.base.prog_data);
>
> - const bool uses_firstvertex =
> - vs_prog_data->uses_basevertex ||
> vs_prog_data->uses_firstvertex;
> + const bool uses_derived_draw_params =
> + vs_prog_data->uses_basevertex || vs_prog_data->uses_drawid;
>
> /* For non-indirect draws, upload the shader draw parameters */
> - if ((uses_firstvertex || vs_prog_data->uses_baseinstance) &&
> + if ((vs_prog_data->uses_firstvertex ||
> vs_prog_data->uses_baseinstance) &&
> brw->draw.draw_params_bo == NULL) {
> brw_upload_data(&brw->upload,
> &brw->draw.params, sizeof(brw->draw.params), 4,
> @@ -716,11 +716,11 @@ brw_prepare_shader_draw_parameters(struct
> brw_context *brw)
> &brw->draw.draw_params_offset);
> }
>
> - if (vs_prog_data->uses_drawid) {
> + if (uses_derived_draw_params) {
> brw_upload_data(&brw->upload,
> - &brw->draw.gl_drawid,
> sizeof(brw->draw.gl_drawid), 4,
> - &brw->draw.draw_id_bo,
> - &brw->draw.draw_id_offset);
> + &brw->draw.derived_params,
> sizeof(brw->draw.derived_params), 4,
> + &brw->draw.derived_draw_params_bo,
> + &brw->draw.derived_draw_params_offset);
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index 1a32c60ae34..8323446b4ac 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -539,16 +539,14 @@ genX(emit_vertices)(struct brw_context *brw)
> }
> #endif
>
> - const bool uses_firstvertex =
> - vs_prog_data->uses_basevertex ||
> vs_prog_data->uses_firstvertex;
> -
> - const bool needs_sgvs_element = (uses_firstvertex ||
> + const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
> vs_prog_data->uses_baseinstance ||
> vs_prog_data->uses_instanceid ||
> vs_prog_data->uses_vertexid);
>
> unsigned nr_elements =
> - brw->vb.nr_enabled + needs_sgvs_element +
> vs_prog_data->uses_drawid;
> + brw->vb.nr_enabled + needs_sgvs_element +
> + (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
>
> #if GEN_GEN < 8
> /* If any of the formats of vb.enabled needs more that one
> upload, we need
> @@ -589,10 +587,15 @@ genX(emit_vertices)(struct brw_context *brw)
>
> /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS
> packets. */
> const bool uses_draw_params =
> - uses_firstvertex ||
> + vs_prog_data->uses_firstvertex ||
> vs_prog_data->uses_baseinstance;
> +
> + const bool uses_derived_draw_params =
> + vs_prog_data->uses_drawid ||
> + vs_prog_data->uses_basevertex;
> +
> const unsigned nr_buffers = brw->vb.nr_buffers +
> - uses_draw_params + vs_prog_data->uses_drawid;
> + uses_draw_params + uses_derived_draw_params;
>
> if (nr_buffers) {
> assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
> @@ -626,11 +629,11 @@ genX(emit_vertices)(struct brw_context *brw)
> 0 /* step rate */);
> }
>
> - if (vs_prog_data->uses_drawid) {
> + if (uses_derived_draw_params) {
> dw = genX(emit_vertex_buffer_state)(brw, dw,
> brw->vb.nr_buffers + 1,
> - brw->draw.draw_id_bo,
> - brw->draw.draw_id_offset,
> - brw->draw.draw_id_bo->size,
> + brw->draw.derived_draw_params_bo,
> + brw->draw.derived_draw_params_offset,
> + brw->draw.derived_draw_params_bo->size,
> 0 /* stride */,
> 0 /* step rate */);
> }
> @@ -772,7 +775,7 @@ genX(emit_vertices)(struct brw_context *brw)
> };
>
> #if GEN_GEN >= 8
> - if (uses_firstvertex ||
> + if (vs_prog_data->uses_firstvertex ||
> vs_prog_data->uses_baseinstance) {
> elem_state.VertexBufferIndex = brw->vb.nr_buffers;
> elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
> @@ -782,11 +785,10 @@ genX(emit_vertices)(struct brw_context *brw)
> #else
> elem_state.VertexBufferIndex = brw->vb.nr_buffers;
> elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
> - if (uses_firstvertex)
> + if (vs_prog_data->uses_firstvertex ||
> vs_prog_data->uses_baseinstance) {
> elem_state.Component0Control = VFCOMP_STORE_SRC;
> -
> - if (vs_prog_data->uses_baseinstance)
> elem_state.Component1Control = VFCOMP_STORE_SRC;
> + }
>
> if (vs_prog_data->uses_vertexid)
> elem_state.Component2Control = VFCOMP_STORE_VID;
> @@ -799,13 +801,13 @@ genX(emit_vertices)(struct brw_context *brw)
> dw += GENX(VERTEX_ELEMENT_STATE_length);
> }
>
> - if (vs_prog_data->uses_drawid) {
> + if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
> struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
> .Valid = true,
> .VertexBufferIndex = brw->vb.nr_buffers + 1,
> - .SourceElementFormat = ISL_FORMAT_R32_UINT,
> + .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
> .Component0Control = VFCOMP_STORE_SRC,
> - .Component1Control = VFCOMP_STORE_0,
> + .Component1Control = VFCOMP_STORE_SRC,
> .Component2Control = VFCOMP_STORE_0,
> .Component3Control = VFCOMP_STORE_0,
> #if GEN_GEN < 5
> --
> 2.14.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> <https://lists.freedesktop.org/mailman/listinfo/mesa-dev>
>
>
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180410/15944254/attachment-0001.html>
More information about the mesa-dev
mailing list