[Mesa-dev] [PATCH v4 6/6] i965: gl_BaseVertex must be zero for non-indexed draw calls

Antia Puentes apuentes at igalia.com
Tue Apr 10 08:28:09 UTC 2018


On 07/04/18 08:21, Jason Ekstrand wrote:

> On Fri, Apr 6, 2018 at 2:53 PM, Ian Romanick <idr at freedesktop.org 
> <mailto:idr at freedesktop.org>> wrote:
>
>     From: Antia Puentes <apuentes at igalia.com <mailto:apuentes at igalia.com>>
>
>     We keep 'firstvertex' as it is and move gl_BaseVertex to the drawID
>     vertex element. The previous Vertex Elements order was:
>
>       * VE 1: <BaseVertex/firstvertex, BaseInstance, VertexID, InstanceID>
>       * VE 2: <Draw ID, 0, 0, 0>
>
>     and now it is:
>
>       * VE 1: <firstvertex, BaseInstance, VertexID, InstanceID>
>       * VE 2: <Draw ID, BaseVertex, 0, 0>
>
>     To move the BaseVertex keeping VE1 as it is, allows to keep
>     pointing the
>     vertex buffer associated to VE 1 to the indirect buffer for indirect
>     draw calls.
>
>     From the OpenGL 4.6 (11.1.3.9 Shader Inputs) specification:
>
>       "gl_BaseVertex holds the integer value passed to the baseVertex
>       parameter to the command that resulted in the current shader
>       invocation. In the case where the command has no baseVertex
>     parameter,
>       the value of gl_BaseVertex is zero."
>
>     Fixes CTS tests:
>
>       * KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysParameters
>       *
>     KHR-GL45.shader_draw_parameters_tests.ShaderDrawArraysInstancedParameters
>       *
>     KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysParameters
>       *
>     KHR-GL45.shader_draw_parameters_tests.ShaderMultiDrawArraysIndirectParameters
>       *
>     KHR-GL45.shader_draw_parameters_tests.MultiDrawArraysIndirectCountParameters
>
>     v2 (idr): Make changes to brw_prepare_shader_draw_parameters matching
>     those in genX(emit_vertices).  Reformat commit message to 72 columns.
>
>     Signed-off-by: Ian Romanick <ian.d.romanick at intel.com
>     <mailto:ian.d.romanick at intel.com>>
>     Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=102678
>     <https://bugs.freedesktop.org/show_bug.cgi?id=102678>
>     ---
>      src/intel/compiler/brw_nir.c                  | 14 +++++----
>      src/intel/compiler/brw_vec4.cpp               | 14 +++++----
>      src/mesa/drivers/dri/i965/brw_context.h       | 32
>     ++++++++++++++-----
>      src/mesa/drivers/dri/i965/brw_draw.c          | 45
>     ++++++++++++++++++---------
>      src/mesa/drivers/dri/i965/brw_draw_upload.c   | 14 ++++-----
>      src/mesa/drivers/dri/i965/genX_state_upload.c | 38
>     +++++++++++-----------
>      6 files changed, 97 insertions(+), 60 deletions(-)
>
>     diff --git a/src/intel/compiler/brw_nir.c
>     b/src/intel/compiler/brw_nir.c
>     index 16b0d86814f..16ab529737b 100644
>     --- a/src/intel/compiler/brw_nir.c
>     +++ b/src/intel/compiler/brw_nir.c
>     @@ -238,8 +238,7 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>          */
>         const bool has_sgvs =
>            nir->info.system_values_read &
>     -      (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>     -       BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>     +      (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>             BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>             BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>             BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
>     @@ -279,7 +278,6 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>
>                     nir_intrinsic_set_base(load, num_inputs);
>                     switch (intrin->intrinsic) {
>     -               case nir_intrinsic_load_base_vertex:
>                     case nir_intrinsic_load_first_vertex:
>                        nir_intrinsic_set_component(load, 0);
>                        break;
>     @@ -293,11 +291,15 @@ brw_nir_lower_vs_inputs(nir_shader *nir,
>                        nir_intrinsic_set_component(load, 3);
>                        break;
>                     case nir_intrinsic_load_draw_id:
>     -                  /* gl_DrawID is stored right after gl_VertexID
>     and friends
>     -                   * if any of them exist.
>     +               case nir_intrinsic_load_base_vertex:
>     +                  /* gl_DrawID and gl_BaseVertex are stored right
>     after
>     +                     gl_VertexID and friends if any of them exist.
>                         */
>                        nir_intrinsic_set_base(load, num_inputs +
>     has_sgvs);
>     -                  nir_intrinsic_set_component(load, 0);
>     +                  if (intrin->intrinsic ==
>     nir_intrinsic_load_draw_id)
>     +                     nir_intrinsic_set_component(load, 0);
>     +                  else
>     +                     nir_intrinsic_set_component(load, 1);
>                        break;
>                     default:
>                        unreachable("Invalid system value intrinsic");
>     diff --git a/src/intel/compiler/brw_vec4.cpp
>     b/src/intel/compiler/brw_vec4.cpp
>     index 1e384f5bf4d..d33caefdea9 100644
>     --- a/src/intel/compiler/brw_vec4.cpp
>     +++ b/src/intel/compiler/brw_vec4.cpp
>     @@ -2825,14 +2825,19 @@ brw_compile_vs(const struct brw_compiler
>     *compiler, void *log_data,
>          * incoming vertex attribute.  So, add an extra slot.
>          */
>         if (shader->info.system_values_read &
>     -       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>     -        BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>     +       (BITFIELD64_BIT(SYSTEM_VALUE_FIRST_VERTEX) |
>              BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
>              BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
>              BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))) {
>            nr_attribute_slots++;
>         }
>
>     +   if (shader->info.system_values_read &
>     +       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
>     +        BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))) {
>     +      nr_attribute_slots++;
>     +   }
>     +
>         if (shader->info.system_values_read &
>             BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX))
>            prog_data->uses_basevertex = true;
>     @@ -2853,12 +2858,9 @@ brw_compile_vs(const struct brw_compiler
>     *compiler, void *log_data,
>             BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID))
>            prog_data->uses_instanceid = true;
>
>     -   /* gl_DrawID has its very own vec4 */
>         if (shader->info.system_values_read &
>     -       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID)) {
>     +       BITFIELD64_BIT(SYSTEM_VALUE_DRAW_ID))
>            prog_data->uses_drawid = true;
>     -      nr_attribute_slots++;
>     -   }
>
>         /* The 3DSTATE_VS documentation lists the lower bound on
>     "Vertex URB Entry
>          * Read Length" as 1 in vec4 mode, and 0 in SIMD8 mode. 
>     Empirically, in
>     diff --git a/src/mesa/drivers/dri/i965/brw_context.h
>     b/src/mesa/drivers/dri/i965/brw_context.h
>     index c65a22c38bb..6758b34f47e 100644
>     --- a/src/mesa/drivers/dri/i965/brw_context.h
>     +++ b/src/mesa/drivers/dri/i965/brw_context.h
>     @@ -898,20 +898,36 @@ struct brw_context
>            } params;
>
>            /**
>     -       * Buffer and offset used for GL_ARB_shader_draw_parameters
>     -       * (for now, only gl_BaseVertex).
>     +       * Buffer and offset used for GL_ARB_shader_draw_parameters
>     which will
>     +       * point to the indirect buffer for indirect draw calls.
>             */
>            struct brw_bo *draw_params_bo;
>            uint32_t draw_params_offset;
>
>     +      struct {
>     +         /**
>     +          * The value of gl_DrawID for the current _mesa_prim.
>     This always comes
>     +          * in from it's own vertex buffer since it's not part of
>     the indirect
>     +          * draw parameters.
>     +          */
>     +         int gl_drawid;
>     +
>     +         /**
>     +          * The value of gl_BaseVertex for the current
>     _mesa_prim. Although
>     +          * gl_BaseVertex is part of the indirect buffer for
>     indexed draw calls,
>     +          * that is not longer the case for non-indexed draw
>     calls, where it must
>     +          * be zero, so we store it in a different buffer.
>     +          */
>     +         int gl_basevertex;
>     +      } derived_params;
>     +
>            /**
>     -       * The value of gl_DrawID for the current _mesa_prim. This
>     always comes
>     -       * in from it's own vertex buffer since it's not part of
>     the indirect
>     -       * draw parameters.
>     +       * Buffer and offset used for GL_ARB_shader_draw_parameters
>     which contains
>     +       * parameters that are not present in the indirect buffer.
>     They will go in
>     +       * their own vertex element.
>             */
>     -      int gl_drawid;
>     -      struct brw_bo *draw_id_bo;
>     -      uint32_t draw_id_offset;
>     +      struct brw_bo *derived_draw_params_bo;
>     +      uint32_t derived_draw_params_offset;
>
>            /**
>             * Pointer to the the buffer storing the indirect draw
>     parameters. It
>     diff --git a/src/mesa/drivers/dri/i965/brw_draw.c
>     b/src/mesa/drivers/dri/i965/brw_draw.c
>     index f51f083178e..e9ec8d585d2 100644
>     --- a/src/mesa/drivers/dri/i965/brw_draw.c
>     +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>     @@ -241,6 +241,14 @@ brw_emit_prim(struct brw_context *brw,
>                                     prim->indirect_offset + 12);
>               brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
>                                     prim->indirect_offset + 16);
>     +
>     +         /* Store the gl_BaseVertex value in its vertex buffer */
>     +         if (brw->draw.derived_draw_params_bo != NULL) {
>     +            brw_store_register_mem32(brw,
>     brw->draw.derived_draw_params_bo,
>     +  GEN7_3DPRIM_BASE_VERTEX,
>     +  brw->draw.derived_draw_params_offset + 4);
>     +            brw_emit_mi_flush(brw);
>     +         }
>
>
> Oh, boy, this is tricky... First of all, it's a bit of a bummer that 
> we can't just load the indirect buffer again for this.  Not too much 
> to do about it, I guess.
>
> Second, there be very scary dragons here.  It turns out that, at least 
> on Haswell (and possibly other platforms), reading from state 
> registers while rendering is in-flight can lead to GPU hangs.  Yes, I 
> said "reading".  We found this out the hard way while working on 
> Vulkan indirect clear colors.  The better thing to do here would be to 
> use GPRs when available (I think they're safe but I'm not sure) or to 
> do a MI_COPY_MEM_MEM which, of course, is only available on gen8+.  On 
> Ivy Bridge (and haswell if we're going to do a store_register_mem from 
> a state register), we need to do a mi_flush *before* the store as well.

I see that this is complicated, I have thought in a different way to 
implement this.
Instead of moving gl_BaseVertex to a VE2 and reading its value from 
state registers:

- VE1 remains as: <firstvertex, BaseInstance, VertexID, InstanceID>
-> Patches 1-5 are still valid (I think) and we can still calculate the 
VertexID as FirstVertex + VertexIDZeroBased.

- VE2 contains: <Draw ID, IsIndexedDraw, 0, 0>,
->when asked for glBaseVertex (nir_instrinsic_load_base_vertex), we 
would return the value stored in FirstVertex is the draw call is 
indexed, zero if it is not.

How does it sound?.

>            } else {
>               brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE, bo,
>                                     prim->indirect_offset + 12);
>     @@ -815,7 +823,7 @@ brw_draw_single_prim(struct gl_context *ctx,
>         }
>
>         /* Determine if we need to flag BRW_NEW_VERTICES for updating the
>     -    * gl_BaseVertexARB or gl_BaseInstanceARB values. For indirect
>     draw, we
>     +    * firstvertex or gl_BaseInstanceARB values. For indirect draw, we
>          * always flag if the shader uses one of the values. For
>     direct draws,
>          * we only flag if the values change.
>          */
>     @@ -825,16 +833,12 @@ brw_draw_single_prim(struct gl_context *ctx,
>         const struct brw_vs_prog_data *vs_prog_data =
>            brw_vs_prog_data(brw->vs.base.prog_data);
>         if (prim_id > 0) {
>     -      const bool uses_firstvertex =
>     -         vs_prog_data->uses_basevertex ||
>     -         vs_prog_data->uses_firstvertex;
>     -
>            const bool uses_draw_parameters =
>     -         uses_firstvertex ||
>     +         vs_prog_data->uses_firstvertex ||
>               vs_prog_data->uses_baseinstance;
>
>            if ((uses_draw_parameters && prim->is_indirect) ||
>     -          (uses_firstvertex &&
>     +          (vs_prog_data->uses_firstvertex &&
>                 brw->draw.params.firstvertex != new_firstvertex) ||
>                (vs_prog_data->uses_baseinstance &&
>                 brw->draw.params.gl_baseinstance != new_baseinstance))
>     @@ -861,17 +865,28 @@ brw_draw_single_prim(struct gl_context *ctx,
>         }
>
>         /* gl_DrawID always needs its own vertex buffer since it's not
>     part of
>     -    * the indirect parameter buffer. If the program uses
>     gl_DrawID we need
>     -    * to flag BRW_NEW_VERTICES. For the first iteration, we don't
>     have
>     -    * valid vs_prog_data, but we always flag BRW_NEW_VERTICES before
>     -    * the loop.
>     +    * the indirect parameter buffer. This is the same for
>     gl_BaseVertex, which
>     +    * is not part of the indirect parameter buffer for
>     non-indexed draw calls.
>     +    * If the program uses gl_DrawID or, uses gl_BaseVertex and it
>     is an indirect
>     +    * draw call or the value has changed, we need to flag
>     BRW_NEW_VERTICES.
>     +    * For the first iteration, we don't have valid vs_prog_data,
>     but we always
>     +    * flag BRW_NEW_VERTICES before the loop.
>          */
>     -   brw->draw.gl_drawid = prim->draw_id;
>     -   brw_bo_unreference(brw->draw.draw_id_bo);
>     -   brw->draw.draw_id_bo = NULL;
>     -   if (prim_id > 0 && vs_prog_data->uses_drawid)
>     +   const int new_basevertex = prim->indexed ? prim->basevertex : 0;
>     +   if (prim_id > 0 &&
>     +       (vs_prog_data->uses_drawid ||
>     +        (vs_prog_data->uses_basevertex &&
>     +         (prim->is_indirect ||
>     +          brw->draw.derived_params.gl_basevertex !=
>     new_basevertex))))
>            brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
>
>     +   brw->draw.derived_params.gl_drawid = prim->draw_id;
>     +   brw->draw.derived_params.gl_basevertex = new_basevertex;
>     +
>     +   brw_bo_unreference(brw->draw.derived_draw_params_bo);
>     +   brw->draw.derived_draw_params_bo = NULL;
>     +   brw->draw.derived_draw_params_offset = 0;
>     +
>         if (devinfo->gen < 6)
>            brw_set_prim(brw, prim);
>         else
>     diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>     b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>     index 7573f780f23..54ba951d6fd 100644
>     --- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
>     +++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
>     @@ -704,11 +704,11 @@ brw_prepare_shader_draw_parameters(struct
>     brw_context *brw)
>         const struct brw_vs_prog_data *vs_prog_data =
>            brw_vs_prog_data(brw->vs.base.prog_data);
>
>     -   const bool uses_firstvertex =
>     -      vs_prog_data->uses_basevertex ||
>     vs_prog_data->uses_firstvertex;
>     +   const bool uses_derived_draw_params =
>     +      vs_prog_data->uses_basevertex || vs_prog_data->uses_drawid;
>
>         /* For non-indirect draws, upload the shader draw parameters */
>     -   if ((uses_firstvertex || vs_prog_data->uses_baseinstance) &&
>     +   if ((vs_prog_data->uses_firstvertex ||
>     vs_prog_data->uses_baseinstance) &&
>             brw->draw.draw_params_bo == NULL) {
>            brw_upload_data(&brw->upload,
>                            &brw->draw.params, sizeof(brw->draw.params), 4,
>     @@ -716,11 +716,11 @@ brw_prepare_shader_draw_parameters(struct
>     brw_context *brw)
>      &brw->draw.draw_params_offset);
>         }
>
>     -   if (vs_prog_data->uses_drawid) {
>     +   if (uses_derived_draw_params) {
>            brw_upload_data(&brw->upload,
>     -                      &brw->draw.gl_drawid,
>     sizeof(brw->draw.gl_drawid), 4,
>     -                      &brw->draw.draw_id_bo,
>     -                      &brw->draw.draw_id_offset);
>     +                      &brw->draw.derived_params,
>     sizeof(brw->draw.derived_params), 4,
>     +                      &brw->draw.derived_draw_params_bo,
>     +                      &brw->draw.derived_draw_params_offset);
>         }
>      }
>
>     diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c
>     b/src/mesa/drivers/dri/i965/genX_state_upload.c
>     index 1a32c60ae34..8323446b4ac 100644
>     --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
>     +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
>     @@ -539,16 +539,14 @@ genX(emit_vertices)(struct brw_context *brw)
>         }
>      #endif
>
>     -   const bool uses_firstvertex =
>     -      vs_prog_data->uses_basevertex ||
>     vs_prog_data->uses_firstvertex;
>     -
>     -   const bool needs_sgvs_element = (uses_firstvertex ||
>     +   const bool needs_sgvs_element = (vs_prog_data->uses_firstvertex ||
>      vs_prog_data->uses_baseinstance ||
>      vs_prog_data->uses_instanceid ||
>      vs_prog_data->uses_vertexid);
>
>         unsigned nr_elements =
>     -      brw->vb.nr_enabled + needs_sgvs_element +
>     vs_prog_data->uses_drawid;
>     +      brw->vb.nr_enabled + needs_sgvs_element +
>     +      (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex);
>
>      #if GEN_GEN < 8
>         /* If any of the formats of vb.enabled needs more that one
>     upload, we need
>     @@ -589,10 +587,15 @@ genX(emit_vertices)(struct brw_context *brw)
>
>         /* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS
>     packets. */
>         const bool uses_draw_params =
>     -      uses_firstvertex ||
>     +      vs_prog_data->uses_firstvertex ||
>            vs_prog_data->uses_baseinstance;
>     +
>     +   const bool uses_derived_draw_params =
>     +      vs_prog_data->uses_drawid ||
>     +      vs_prog_data->uses_basevertex;
>     +
>         const unsigned nr_buffers = brw->vb.nr_buffers +
>     -      uses_draw_params + vs_prog_data->uses_drawid;
>     +      uses_draw_params + uses_derived_draw_params;
>
>         if (nr_buffers) {
>            assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
>     @@ -626,11 +629,11 @@ genX(emit_vertices)(struct brw_context *brw)
>                                                   0 /* step rate */);
>            }
>
>     -      if (vs_prog_data->uses_drawid) {
>     +      if (uses_derived_draw_params) {
>               dw = genX(emit_vertex_buffer_state)(brw, dw,
>     brw->vb.nr_buffers + 1,
>     -  brw->draw.draw_id_bo,
>     -  brw->draw.draw_id_offset,
>     -  brw->draw.draw_id_bo->size,
>     +  brw->draw.derived_draw_params_bo,
>     +  brw->draw.derived_draw_params_offset,
>     +  brw->draw.derived_draw_params_bo->size,
>                                                   0 /* stride */,
>                                                   0 /* step rate */);
>            }
>     @@ -772,7 +775,7 @@ genX(emit_vertices)(struct brw_context *brw)
>            };
>
>      #if GEN_GEN >= 8
>     -      if (uses_firstvertex ||
>     +      if (vs_prog_data->uses_firstvertex ||
>                vs_prog_data->uses_baseinstance) {
>               elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>               elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
>     @@ -782,11 +785,10 @@ genX(emit_vertices)(struct brw_context *brw)
>      #else
>            elem_state.VertexBufferIndex = brw->vb.nr_buffers;
>            elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
>     -      if (uses_firstvertex)
>     +      if (vs_prog_data->uses_firstvertex ||
>     vs_prog_data->uses_baseinstance) {
>               elem_state.Component0Control = VFCOMP_STORE_SRC;
>     -
>     -      if (vs_prog_data->uses_baseinstance)
>               elem_state.Component1Control = VFCOMP_STORE_SRC;
>     +      }
>
>            if (vs_prog_data->uses_vertexid)
>               elem_state.Component2Control = VFCOMP_STORE_VID;
>     @@ -799,13 +801,13 @@ genX(emit_vertices)(struct brw_context *brw)
>            dw += GENX(VERTEX_ELEMENT_STATE_length);
>         }
>
>     -   if (vs_prog_data->uses_drawid) {
>     +   if (vs_prog_data->uses_drawid || vs_prog_data->uses_basevertex) {
>            struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
>               .Valid = true,
>               .VertexBufferIndex = brw->vb.nr_buffers + 1,
>     -         .SourceElementFormat = ISL_FORMAT_R32_UINT,
>     +         .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
>               .Component0Control = VFCOMP_STORE_SRC,
>     -         .Component1Control = VFCOMP_STORE_0,
>     +         .Component1Control = VFCOMP_STORE_SRC,
>               .Component2Control = VFCOMP_STORE_0,
>               .Component3Control = VFCOMP_STORE_0,
>      #if GEN_GEN < 5
>     --
>     2.14.3
>
>     _______________________________________________
>     mesa-dev mailing list
>     mesa-dev at lists.freedesktop.org <mailto:mesa-dev at lists.freedesktop.org>
>     https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>     <https://lists.freedesktop.org/mailman/listinfo/mesa-dev>
>
>
>
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180410/15944254/attachment-0001.html>


More information about the mesa-dev mailing list