[Mesa-dev] [PATCH 8/8] st/mesa: add double input support including lowering (v3)

Wed Apr 29 20:23:59 PDT 2015

On Wed, Apr 29, 2015 at 9:14 PM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This takes a different approach to previously, we cannot index into the
> inputMapping with anything but the mesa attribute index, so we can't use
> the just add one to index trick, we need more info to add one to it
> after we've mapped the input.

Almost certainly a failing on my part, but the above makes little sense to me.

>
> (Fixed copy propgation and cleaned up a little)
>
> v2: drop float64 format check, just attr->Doubles.
> merge enable patch.
> v3: cleanup code a bit.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/mesa/state_tracker/st_atom_array.c     | 171 ++++++++++++++++++++++-------
>  src/mesa/state_tracker/st_extensions.c     |   4 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  23 +++-
>  src/mesa/state_tracker/st_program.c        |   5 +
>  src/mesa/state_tracker/st_program.h        |   1 +
>  5 files changed, 159 insertions(+), 45 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
> index d4fb8b8..0f2720b 100644
> --- a/src/mesa/state_tracker/st_atom_array.c
> +++ b/src/mesa/state_tracker/st_atom_array.c
> @@ -44,7 +44,6 @@
>
>  #include "cso_cache/cso_context.h"
>  #include "util/u_math.h"
> -
>  #include "main/bufferobj.h"
>  #include "main/glformats.h"
>
> @@ -311,6 +310,17 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
>     return PIPE_FORMAT_NONE; /* silence compiler warning */
>  }
>
> +static const struct gl_client_array *
> +get_client_array(const struct st_vertex_program *vp,
> +                 const struct gl_client_array **arrays,
> +                 int attr)
> +{
> +   const GLuint mesaAttr = vp->index_to_input[attr];
> +   /* st_program uses 0xffffffff to denote a double placeholder attribute */
> +   if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
> +      return NULL;
> +   return arrays[mesaAttr];
> +}

newline

>  /**
>   * Examine the active arrays to determine if we have interleaved
>   * vertex arrays all living in one VBO, or all living in user space.
> @@ -327,11 +337,16 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
>     GLboolean userSpaceBuffer = GL_FALSE;
>
>     for (attr = 0; attr < vpv->num_inputs; attr++) {
> -      const GLuint mesaAttr = vp->index_to_input[attr];
> -      const struct gl_client_array *array = arrays[mesaAttr];
> -      const struct gl_buffer_object *bufObj = array->BufferObj;
> -      const GLsizei stride = array->StrideB; /* in bytes */
> +      const struct gl_client_array *array;
> +      const struct gl_buffer_object *bufObj;
> +      GLsizei stride;
>
> +      array = get_client_array(vp, arrays, attr);
> +      if (!array)
> +        continue;
> +
> +      stride = array->StrideB; /* in bytes */
> +      bufObj = array->BufferObj;
>        if (attr == 0) {
>           /* save info about the first array */
>           firstStride = stride;
> @@ -358,6 +373,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
>     return GL_TRUE;
>  }
>
> +static void init_velement(struct pipe_vertex_element *velement,
> +                          int src_offset, int format,
> +                          int instance_divisor, int vbo_index)
> +{
> +   velement->src_offset = src_offset;
> +   velement->src_format = format;
> +   velement->instance_divisor = instance_divisor;
> +   velement->vertex_buffer_index = vbo_index;
> +   assert(velement->src_format);
> +}
> +
> +static void init_velement_lowered(struct st_context *st,
> +                                  struct pipe_vertex_element *velements,
> +                                  int src_offset, int format,
> +                                  int instance_divisor, int vbo_index,
> +                                  int nr_components, GLboolean doubles,
> +                                  GLuint *attr_idx)
> +{
> +   int idx = *attr_idx;
> +   if (doubles) {
> +      int lower_format;
> +
> +      if (nr_components == 1)
> +         lower_format = PIPE_FORMAT_R32G32_UINT;
> +      else if (nr_components >= 2)
> +         lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
> +
> +      init_velement(&velements[idx], src_offset,
> +                    lower_format, instance_divisor, vbo_index);
> +      idx++;
> +
> +      if (nr_components > 2) {
> +         if (nr_components == 3)
> +            lower_format = PIPE_FORMAT_R32G32_UINT;
> +         else if (nr_components >= 4)
> +            lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
> +
> +         init_velement(&velements[idx], src_offset + 4 * sizeof(float),
> +                       lower_format, instance_divisor, vbo_index);
> +         idx++;
> +      }
> +   } else {
> +      init_velement(&velements[idx], src_offset,
> +                    format, instance_divisor, vbo_index);
> +      idx++;
> +   }
> +   *attr_idx = idx;
> +}
> +
>  /**
>   * Set up for drawing interleaved arrays that all live in one VBO
>   * or all live in user space.
> @@ -365,13 +429,15 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
>   * \param velements  returns vertex element info
>   */
>  static boolean
> -setup_interleaved_attribs(const struct st_vertex_program *vp,
> +setup_interleaved_attribs(struct st_context *st,
> +                          const struct st_vertex_program *vp,
>                            const struct st_vp_variant *vpv,
>                            const struct gl_client_array **arrays,
>                            struct pipe_vertex_buffer *vbuffer,
> -                          struct pipe_vertex_element velements[])
> +                          struct pipe_vertex_element velements[],
> +                          unsigned *num_velements)
>  {
> -   GLuint attr;
> +   GLuint attr, attr_idx;
>     const GLubyte *low_addr = NULL;
>     GLboolean usingVBO;      /* all arrays in a VBO? */
>     struct gl_buffer_object *bufobj;
> @@ -381,8 +447,10 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
>      * Init bufobj and stride.
>      */
>     if (vpv->num_inputs) {
> -      const GLuint mesaAttr0 = vp->index_to_input[0];
> -      const struct gl_client_array *array = arrays[mesaAttr0];
> +      const struct gl_client_array *array;
> +
> +      array = get_client_array(vp, arrays, 0);
> +      assert(array);
>
>        /* Since we're doing interleaved arrays, we know there'll be at most
>         * one buffer object and the stride will be the same for all arrays.
> @@ -394,7 +462,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
>        low_addr = arrays[vp->index_to_input[0]]->Ptr;
>
>        for (attr = 1; attr < vpv->num_inputs; attr++) {
> -         const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
> +         const GLubyte *start;
> +         array = get_client_array(vp, arrays, attr);
> +         if (!array)
> +            continue;
> +         start = array->Ptr;
>           low_addr = MIN2(low_addr, start);
>        }
>     }
> @@ -408,25 +480,33 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
>     /* are the arrays in user space? */
>     usingVBO = _mesa_is_bufferobj(bufobj);
>
> +   attr_idx = 0;
>     for (attr = 0; attr < vpv->num_inputs; attr++) {
> -      const GLuint mesaAttr = vp->index_to_input[attr];
> -      const struct gl_client_array *array = arrays[mesaAttr];
> -      unsigned src_offset = (unsigned) (array->Ptr - low_addr);
> +      const struct gl_client_array *array;
> +      unsigned src_offset;
> +      unsigned src_format;
> +
> +      array = get_client_array(vp, arrays, attr);
> +      if (!array)
> +         continue;

Is this the reason you can't store attr+1 in there?

>
> +      src_offset = (unsigned) (array->Ptr - low_addr);
>        assert(array->_ElementSize ==
>               _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
>
> -      velements[attr].src_offset = src_offset;
> -      velements[attr].instance_divisor = array->InstanceDivisor;
> -      velements[attr].vertex_buffer_index = 0;
> -      velements[attr].src_format = st_pipe_vertex_format(array->Type,
> -                                                         array->Size,
> -                                                         array->Format,
> -                                                         array->Normalized,
> -                                                         array->Integer);
> -      assert(velements[attr].src_format);
> +      src_format = st_pipe_vertex_format(array->Type,
> +                                         array->Size,
> +                                         array->Format,
> +                                         array->Normalized,
> +                                         array->Integer);
> +
> +      init_velement_lowered(st, velements, src_offset, src_format,
> +                            array->InstanceDivisor, 0,
> +                            array->Size, array->Doubles, &attr_idx);
>     }
>
> +   *num_velements = attr_idx;
> +
>     /*
>      * Return the vbuffer info and setup user-space attrib info, if needed.
>      */
> @@ -472,17 +552,25 @@ setup_non_interleaved_attribs(struct st_context *st,
>                                const struct st_vp_variant *vpv,
>                                const struct gl_client_array **arrays,
>                                struct pipe_vertex_buffer vbuffer[],
> -                              struct pipe_vertex_element velements[])
> +                              struct pipe_vertex_element velements[],
> +                              unsigned *num_velements)
>  {
>     struct gl_context *ctx = st->ctx;
> -   GLuint attr;
> +   GLuint attr, attr_idx = 0;
>
>     for (attr = 0; attr < vpv->num_inputs; attr++) {
>        const GLuint mesaAttr = vp->index_to_input[attr];
> -      const struct gl_client_array *array = arrays[mesaAttr];
> -      struct gl_buffer_object *bufobj = array->BufferObj;
> -      GLsizei stride = array->StrideB;
> +      const struct gl_client_array *array;
> +      struct gl_buffer_object *bufobj;
> +      GLsizei stride;
> +      unsigned src_format;
>
> +      array = get_client_array(vp, arrays, attr);
> +      if (!array)
> +         continue;
> +
> +      stride = array->StrideB;
> +      bufobj = array->BufferObj;
>        assert(array->_ElementSize ==
>               _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
>
> @@ -524,16 +612,19 @@ setup_non_interleaved_attribs(struct st_context *st,
>        /* common-case setup */
>        vbuffer[attr].stride = stride; /* in bytes */
>
> -      velements[attr].src_offset = 0;
> -      velements[attr].instance_divisor = array->InstanceDivisor;
> -      velements[attr].vertex_buffer_index = attr;
> -      velements[attr].src_format = st_pipe_vertex_format(array->Type,
> -                                                         array->Size,
> -                                                         array->Format,
> -                                                         array->Normalized,
> -                                                         array->Integer);
> -      assert(velements[attr].src_format);
> +      src_format = st_pipe_vertex_format(array->Type,
> +                                         array->Size,
> +                                         array->Format,
> +                                         array->Normalized,
> +                                         array->Integer);
> +
> +      init_velement_lowered(st, velements, 0, src_format,
> +                            array->InstanceDivisor, attr,
> +                            array->Size, array->Doubles, &attr_idx);
> +
>     }
> +
> +   *num_velements = attr_idx;
>     return TRUE;
>  }
>
> @@ -563,25 +654,23 @@ static void update_array(struct st_context *st)
>      * Setup the vbuffer[] and velements[] arrays.
>      */
>     if (is_interleaved_arrays(vp, vpv, arrays)) {
> -      if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) {
> +      if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) {
>           st->vertex_array_out_of_memory = TRUE;
>           return;
>        }
>
>        num_vbuffers = 1;
> -      num_velements = vpv->num_inputs;
>        if (num_velements == 0)
>           num_vbuffers = 0;
>     }
>     else {
>        if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
> -                                         velements)) {
> +                                         velements, &num_velements)) {
>           st->vertex_array_out_of_memory = TRUE;
>           return;
>        }
>
>        num_vbuffers = vpv->num_inputs;
> -      num_velements = vpv->num_inputs;
>     }
>
>     cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
> index 82e4a30..b1057f3 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen,
>     if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
>                                  PIPE_SHADER_CAP_DOUBLES) &&
>         screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
> -                                PIPE_SHADER_CAP_DOUBLES))
> +                                PIPE_SHADER_CAP_DOUBLES)) {
>        extensions->ARB_gpu_shader_fp64 = GL_TRUE;
> +      extensions->ARB_vertex_attrib_64bit = GL_TRUE;
> +   }
>  }
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 08957dc..70fbfb1 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -88,6 +88,7 @@ public:
>        this->reladdr = NULL;
>        this->reladdr2 = NULL;
>        this->has_index2 = false;
> +      this->double_reg2 = false;
>     }
>
>     st_src_reg(gl_register_file file, int index, int type)
> @@ -101,6 +102,7 @@ public:
>        this->reladdr = NULL;
>        this->reladdr2 = NULL;
>        this->has_index2 = false;
> +      this->double_reg2 = false;
>     }
>
>     st_src_reg(gl_register_file file, int index, int type, int index2D)
> @@ -114,6 +116,7 @@ public:
>        this->reladdr = NULL;
>        this->reladdr2 = NULL;
>        this->has_index2 = false;
> +      this->double_reg2 = false;
>     }
>
>     st_src_reg()
> @@ -127,6 +130,7 @@ public:
>        this->reladdr = NULL;
>        this->reladdr2 = NULL;
>        this->has_index2 = false;
> +      this->double_reg2 = false;
>     }
>
>     explicit st_src_reg(st_dst_reg reg);
> @@ -141,6 +145,7 @@ public:
>     st_src_reg *reladdr;
>     st_src_reg *reladdr2;
>     bool has_index2;
> +   bool double_reg2;

Perhaps a word about what this is? Unlike all the other entries, it's
not self-explanatory. I guess it's to indicate a source that's the
"second" piece of a double-wide value?

>  };
>
>  class st_dst_reg {
> @@ -197,6 +202,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
>     this->index2D = 0;
>     this->reladdr2 = NULL;
>     this->has_index2 = false;
> +   this->double_reg2 = false;
>  }
>
>  st_dst_reg::st_dst_reg(st_src_reg reg)
> @@ -677,8 +683,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
>
>              if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
>                 dinst->src[j].index = initial_src_idx[j];
> -               if (swz > 1)
> +               if (swz > 1) {
> +                  dinst->src[j].double_reg2 = true;
>                    dinst->src[j].index++;
> +              }
>
>                 if (swz & 1)
>                    dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
> @@ -3705,6 +3713,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>              } else {
>                 if (first->src[0].file != copy_chan->src[0].file ||
>                     first->src[0].index != copy_chan->src[0].index ||
> +                   first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
>                     first->src[0].index2D != copy_chan->src[0].index2D) {
>                    good = false;
>                    break;
> @@ -3720,6 +3729,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
>              inst->src[r].index = first->src[0].index;
>              inst->src[r].index2D = first->src[0].index2D;
>              inst->src[r].has_index2 = first->src[0].has_index2;
> +            inst->src[r].double_reg2 = first->src[0].double_reg2;
>
>              int swizzle = 0;
>              for (int i = 0; i < 4; i++) {
> @@ -4552,6 +4562,9 @@ dst_register(struct st_translate *t,
>  static struct ureg_src
>  src_register(struct st_translate *t, const st_src_reg *reg)
>  {
> +   int index = reg->index;
> +   int double_reg2 = reg->double_reg2 ? 1 : 0;
> +
>     switch(reg->file) {
>     case PROGRAM_UNDEFINED:
>        return ureg_imm4f(t->ureg, 0, 0, 0, 0);
> @@ -4577,8 +4590,12 @@ src_register(struct st_translate *t, const st_src_reg *reg)
>        return t->immediates[reg->index];
>
>     case PROGRAM_INPUT:
> -      assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs));
> -      return t->inputs[t->inputMapping[reg->index]];
> +      /* GLSL inputs are 64-bit containers, so we have to
> +       * map back to the original index and add the offset after
> +       * mapping. */
> +      index -= double_reg2;
> +      assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
> +      return t->inputs[t->inputMapping[index] + double_reg2];

Just a thought -- why not make t->inputMapping[index+1] contain
t->inputMapping[index]+1? That'd make these gymnastics less necessary,
right? Perhaps it's complicated though...

>
>     case PROGRAM_OUTPUT:
>        assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
> diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
> index d93b3c7..a9110d3 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx,
>           stvp->input_to_index[attr] = stvp->num_inputs;
>           stvp->index_to_input[stvp->num_inputs] = attr;
>           stvp->num_inputs++;
> +         if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
> +            /* add placeholder for second part of a double attribute */
> +            stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
> +            stvp->num_inputs++;

Why not just set this to the right thing? Or is there no right thing?
Seems like you end up using it as though it were attr + 1...

> +         }
>        }
>     }
>     /* bit of a hack, presetup potentially unused edgeflag input */
> diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
> index b2c86fa..a2c5606 100644
> --- a/src/mesa/state_tracker/st_program.h
> +++ b/src/mesa/state_tracker/st_program.h
> @@ -45,6 +45,7 @@
>  extern "C" {
>  #endif
>
> +#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff
>
>  /** Fragment program variant key */
>  struct st_fp_variant_key
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev