[Mesa-dev] [PATCH 8/8] st/mesa: add double input support including lowering (v3)
Ilia Mirkin
imirkin at alum.mit.edu
Wed Apr 29 20:23:59 PDT 2015
On Wed, Apr 29, 2015 at 9:14 PM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This takes a different approach to previously, we cannot index into the
> inputMapping with anything but the mesa attribute index, so we can't use
> the just add one to index trick, we need more info to add one to it
> after we've mapped the input.
Almost certainly a failing on my part, but the above makes little sense to me.
>
> (Fixed copy propgation and cleaned up a little)
>
> v2: drop float64 format check, just attr->Doubles.
> merge enable patch.
> v3: cleanup code a bit.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/mesa/state_tracker/st_atom_array.c | 171 ++++++++++++++++++++++-------
> src/mesa/state_tracker/st_extensions.c | 4 +-
> src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 23 +++-
> src/mesa/state_tracker/st_program.c | 5 +
> src/mesa/state_tracker/st_program.h | 1 +
> 5 files changed, 159 insertions(+), 45 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c
> index d4fb8b8..0f2720b 100644
> --- a/src/mesa/state_tracker/st_atom_array.c
> +++ b/src/mesa/state_tracker/st_atom_array.c
> @@ -44,7 +44,6 @@
>
> #include "cso_cache/cso_context.h"
> #include "util/u_math.h"
> -
> #include "main/bufferobj.h"
> #include "main/glformats.h"
>
> @@ -311,6 +310,17 @@ st_pipe_vertex_format(GLenum type, GLuint size, GLenum format,
> return PIPE_FORMAT_NONE; /* silence compiler warning */
> }
>
> +static const struct gl_client_array *
> +get_client_array(const struct st_vertex_program *vp,
> + const struct gl_client_array **arrays,
> + int attr)
> +{
> + const GLuint mesaAttr = vp->index_to_input[attr];
> + /* st_program uses 0xffffffff to denote a double placeholder attribute */
> + if (mesaAttr == ST_DOUBLE_ATTRIB_PLACEHOLDER)
> + return NULL;
> + return arrays[mesaAttr];
> +}
newline
> /**
> * Examine the active arrays to determine if we have interleaved
> * vertex arrays all living in one VBO, or all living in user space.
> @@ -327,11 +337,16 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
> GLboolean userSpaceBuffer = GL_FALSE;
>
> for (attr = 0; attr < vpv->num_inputs; attr++) {
> - const GLuint mesaAttr = vp->index_to_input[attr];
> - const struct gl_client_array *array = arrays[mesaAttr];
> - const struct gl_buffer_object *bufObj = array->BufferObj;
> - const GLsizei stride = array->StrideB; /* in bytes */
> + const struct gl_client_array *array;
> + const struct gl_buffer_object *bufObj;
> + GLsizei stride;
>
> + array = get_client_array(vp, arrays, attr);
> + if (!array)
> + continue;
> +
> + stride = array->StrideB; /* in bytes */
> + bufObj = array->BufferObj;
> if (attr == 0) {
> /* save info about the first array */
> firstStride = stride;
> @@ -358,6 +373,55 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
> return GL_TRUE;
> }
>
> +static void init_velement(struct pipe_vertex_element *velement,
> + int src_offset, int format,
> + int instance_divisor, int vbo_index)
> +{
> + velement->src_offset = src_offset;
> + velement->src_format = format;
> + velement->instance_divisor = instance_divisor;
> + velement->vertex_buffer_index = vbo_index;
> + assert(velement->src_format);
> +}
> +
> +static void init_velement_lowered(struct st_context *st,
> + struct pipe_vertex_element *velements,
> + int src_offset, int format,
> + int instance_divisor, int vbo_index,
> + int nr_components, GLboolean doubles,
> + GLuint *attr_idx)
> +{
> + int idx = *attr_idx;
> + if (doubles) {
> + int lower_format;
> +
> + if (nr_components == 1)
> + lower_format = PIPE_FORMAT_R32G32_UINT;
> + else if (nr_components >= 2)
> + lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
> +
> + init_velement(&velements[idx], src_offset,
> + lower_format, instance_divisor, vbo_index);
> + idx++;
> +
> + if (nr_components > 2) {
> + if (nr_components == 3)
> + lower_format = PIPE_FORMAT_R32G32_UINT;
> + else if (nr_components >= 4)
> + lower_format = PIPE_FORMAT_R32G32B32A32_UINT;
> +
> + init_velement(&velements[idx], src_offset + 4 * sizeof(float),
> + lower_format, instance_divisor, vbo_index);
> + idx++;
> + }
> + } else {
> + init_velement(&velements[idx], src_offset,
> + format, instance_divisor, vbo_index);
> + idx++;
> + }
> + *attr_idx = idx;
> +}
> +
> /**
> * Set up for drawing interleaved arrays that all live in one VBO
> * or all live in user space.
> @@ -365,13 +429,15 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
> * \param velements returns vertex element info
> */
> static boolean
> -setup_interleaved_attribs(const struct st_vertex_program *vp,
> +setup_interleaved_attribs(struct st_context *st,
> + const struct st_vertex_program *vp,
> const struct st_vp_variant *vpv,
> const struct gl_client_array **arrays,
> struct pipe_vertex_buffer *vbuffer,
> - struct pipe_vertex_element velements[])
> + struct pipe_vertex_element velements[],
> + unsigned *num_velements)
> {
> - GLuint attr;
> + GLuint attr, attr_idx;
> const GLubyte *low_addr = NULL;
> GLboolean usingVBO; /* all arrays in a VBO? */
> struct gl_buffer_object *bufobj;
> @@ -381,8 +447,10 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
> * Init bufobj and stride.
> */
> if (vpv->num_inputs) {
> - const GLuint mesaAttr0 = vp->index_to_input[0];
> - const struct gl_client_array *array = arrays[mesaAttr0];
> + const struct gl_client_array *array;
> +
> + array = get_client_array(vp, arrays, 0);
> + assert(array);
>
> /* Since we're doing interleaved arrays, we know there'll be at most
> * one buffer object and the stride will be the same for all arrays.
> @@ -394,7 +462,11 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
> low_addr = arrays[vp->index_to_input[0]]->Ptr;
>
> for (attr = 1; attr < vpv->num_inputs; attr++) {
> - const GLubyte *start = arrays[vp->index_to_input[attr]]->Ptr;
> + const GLubyte *start;
> + array = get_client_array(vp, arrays, attr);
> + if (!array)
> + continue;
> + start = array->Ptr;
> low_addr = MIN2(low_addr, start);
> }
> }
> @@ -408,25 +480,33 @@ setup_interleaved_attribs(const struct st_vertex_program *vp,
> /* are the arrays in user space? */
> usingVBO = _mesa_is_bufferobj(bufobj);
>
> + attr_idx = 0;
> for (attr = 0; attr < vpv->num_inputs; attr++) {
> - const GLuint mesaAttr = vp->index_to_input[attr];
> - const struct gl_client_array *array = arrays[mesaAttr];
> - unsigned src_offset = (unsigned) (array->Ptr - low_addr);
> + const struct gl_client_array *array;
> + unsigned src_offset;
> + unsigned src_format;
> +
> + array = get_client_array(vp, arrays, attr);
> + if (!array)
> + continue;
Is this the reason you can't store attr+1 in there?
>
> + src_offset = (unsigned) (array->Ptr - low_addr);
> assert(array->_ElementSize ==
> _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
>
> - velements[attr].src_offset = src_offset;
> - velements[attr].instance_divisor = array->InstanceDivisor;
> - velements[attr].vertex_buffer_index = 0;
> - velements[attr].src_format = st_pipe_vertex_format(array->Type,
> - array->Size,
> - array->Format,
> - array->Normalized,
> - array->Integer);
> - assert(velements[attr].src_format);
> + src_format = st_pipe_vertex_format(array->Type,
> + array->Size,
> + array->Format,
> + array->Normalized,
> + array->Integer);
> +
> + init_velement_lowered(st, velements, src_offset, src_format,
> + array->InstanceDivisor, 0,
> + array->Size, array->Doubles, &attr_idx);
> }
>
> + *num_velements = attr_idx;
> +
> /*
> * Return the vbuffer info and setup user-space attrib info, if needed.
> */
> @@ -472,17 +552,25 @@ setup_non_interleaved_attribs(struct st_context *st,
> const struct st_vp_variant *vpv,
> const struct gl_client_array **arrays,
> struct pipe_vertex_buffer vbuffer[],
> - struct pipe_vertex_element velements[])
> + struct pipe_vertex_element velements[],
> + unsigned *num_velements)
> {
> struct gl_context *ctx = st->ctx;
> - GLuint attr;
> + GLuint attr, attr_idx = 0;
>
> for (attr = 0; attr < vpv->num_inputs; attr++) {
> const GLuint mesaAttr = vp->index_to_input[attr];
> - const struct gl_client_array *array = arrays[mesaAttr];
> - struct gl_buffer_object *bufobj = array->BufferObj;
> - GLsizei stride = array->StrideB;
> + const struct gl_client_array *array;
> + struct gl_buffer_object *bufobj;
> + GLsizei stride;
> + unsigned src_format;
>
> + array = get_client_array(vp, arrays, attr);
> + if (!array)
> + continue;
> +
> + stride = array->StrideB;
> + bufobj = array->BufferObj;
> assert(array->_ElementSize ==
> _mesa_bytes_per_vertex_attrib(array->Size, array->Type));
>
> @@ -524,16 +612,19 @@ setup_non_interleaved_attribs(struct st_context *st,
> /* common-case setup */
> vbuffer[attr].stride = stride; /* in bytes */
>
> - velements[attr].src_offset = 0;
> - velements[attr].instance_divisor = array->InstanceDivisor;
> - velements[attr].vertex_buffer_index = attr;
> - velements[attr].src_format = st_pipe_vertex_format(array->Type,
> - array->Size,
> - array->Format,
> - array->Normalized,
> - array->Integer);
> - assert(velements[attr].src_format);
> + src_format = st_pipe_vertex_format(array->Type,
> + array->Size,
> + array->Format,
> + array->Normalized,
> + array->Integer);
> +
> + init_velement_lowered(st, velements, 0, src_format,
> + array->InstanceDivisor, attr,
> + array->Size, array->Doubles, &attr_idx);
> +
> }
> +
> + *num_velements = attr_idx;
> return TRUE;
> }
>
> @@ -563,25 +654,23 @@ static void update_array(struct st_context *st)
> * Setup the vbuffer[] and velements[] arrays.
> */
> if (is_interleaved_arrays(vp, vpv, arrays)) {
> - if (!setup_interleaved_attribs(vp, vpv, arrays, vbuffer, velements)) {
> + if (!setup_interleaved_attribs(st, vp, vpv, arrays, vbuffer, velements, &num_velements)) {
> st->vertex_array_out_of_memory = TRUE;
> return;
> }
>
> num_vbuffers = 1;
> - num_velements = vpv->num_inputs;
> if (num_velements == 0)
> num_vbuffers = 0;
> }
> else {
> if (!setup_non_interleaved_attribs(st, vp, vpv, arrays, vbuffer,
> - velements)) {
> + velements, &num_velements)) {
> st->vertex_array_out_of_memory = TRUE;
> return;
> }
>
> num_vbuffers = vpv->num_inputs;
> - num_velements = vpv->num_inputs;
> }
>
> cso_set_vertex_buffers(st->cso_context, 0, num_vbuffers, vbuffer);
> diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c
> index 82e4a30..b1057f3 100644
> --- a/src/mesa/state_tracker/st_extensions.c
> +++ b/src/mesa/state_tracker/st_extensions.c
> @@ -909,6 +909,8 @@ void st_init_extensions(struct pipe_screen *screen,
> if (screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
> PIPE_SHADER_CAP_DOUBLES) &&
> screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
> - PIPE_SHADER_CAP_DOUBLES))
> + PIPE_SHADER_CAP_DOUBLES)) {
> extensions->ARB_gpu_shader_fp64 = GL_TRUE;
> + extensions->ARB_vertex_attrib_64bit = GL_TRUE;
> + }
> }
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 08957dc..70fbfb1 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -88,6 +88,7 @@ public:
> this->reladdr = NULL;
> this->reladdr2 = NULL;
> this->has_index2 = false;
> + this->double_reg2 = false;
> }
>
> st_src_reg(gl_register_file file, int index, int type)
> @@ -101,6 +102,7 @@ public:
> this->reladdr = NULL;
> this->reladdr2 = NULL;
> this->has_index2 = false;
> + this->double_reg2 = false;
> }
>
> st_src_reg(gl_register_file file, int index, int type, int index2D)
> @@ -114,6 +116,7 @@ public:
> this->reladdr = NULL;
> this->reladdr2 = NULL;
> this->has_index2 = false;
> + this->double_reg2 = false;
> }
>
> st_src_reg()
> @@ -127,6 +130,7 @@ public:
> this->reladdr = NULL;
> this->reladdr2 = NULL;
> this->has_index2 = false;
> + this->double_reg2 = false;
> }
>
> explicit st_src_reg(st_dst_reg reg);
> @@ -141,6 +145,7 @@ public:
> st_src_reg *reladdr;
> st_src_reg *reladdr2;
> bool has_index2;
> + bool double_reg2;
Perhaps a word about what this is? Unlike all the other entries, it's
not self-explanatory. I guess it's to indicate a source that's the
"second" piece of a double-wide value?
> };
>
> class st_dst_reg {
> @@ -197,6 +202,7 @@ st_src_reg::st_src_reg(st_dst_reg reg)
> this->index2D = 0;
> this->reladdr2 = NULL;
> this->has_index2 = false;
> + this->double_reg2 = false;
> }
>
> st_dst_reg::st_dst_reg(st_src_reg reg)
> @@ -677,8 +683,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
>
> if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
> dinst->src[j].index = initial_src_idx[j];
> - if (swz > 1)
> + if (swz > 1) {
> + dinst->src[j].double_reg2 = true;
> dinst->src[j].index++;
> + }
>
> if (swz & 1)
> dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
> @@ -3705,6 +3713,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> } else {
> if (first->src[0].file != copy_chan->src[0].file ||
> first->src[0].index != copy_chan->src[0].index ||
> + first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
> first->src[0].index2D != copy_chan->src[0].index2D) {
> good = false;
> break;
> @@ -3720,6 +3729,7 @@ glsl_to_tgsi_visitor::copy_propagate(void)
> inst->src[r].index = first->src[0].index;
> inst->src[r].index2D = first->src[0].index2D;
> inst->src[r].has_index2 = first->src[0].has_index2;
> + inst->src[r].double_reg2 = first->src[0].double_reg2;
>
> int swizzle = 0;
> for (int i = 0; i < 4; i++) {
> @@ -4552,6 +4562,9 @@ dst_register(struct st_translate *t,
> static struct ureg_src
> src_register(struct st_translate *t, const st_src_reg *reg)
> {
> + int index = reg->index;
> + int double_reg2 = reg->double_reg2 ? 1 : 0;
> +
> switch(reg->file) {
> case PROGRAM_UNDEFINED:
> return ureg_imm4f(t->ureg, 0, 0, 0, 0);
> @@ -4577,8 +4590,12 @@ src_register(struct st_translate *t, const st_src_reg *reg)
> return t->immediates[reg->index];
>
> case PROGRAM_INPUT:
> - assert(t->inputMapping[reg->index] < ARRAY_SIZE(t->inputs));
> - return t->inputs[t->inputMapping[reg->index]];
> + /* GLSL inputs are 64-bit containers, so we have to
> + * map back to the original index and add the offset after
> + * mapping. */
> + index -= double_reg2;
> + assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
> + return t->inputs[t->inputMapping[index] + double_reg2];
Just a thought -- why not make t->inputMapping[index+1] contain
t->inputMapping[index]+1? That'd make these gymnastics less necessary,
right? Perhaps it's complicated though...
>
> case PROGRAM_OUTPUT:
> assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
> diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c
> index d93b3c7..a9110d3 100644
> --- a/src/mesa/state_tracker/st_program.c
> +++ b/src/mesa/state_tracker/st_program.c
> @@ -194,6 +194,11 @@ st_prepare_vertex_program(struct gl_context *ctx,
> stvp->input_to_index[attr] = stvp->num_inputs;
> stvp->index_to_input[stvp->num_inputs] = attr;
> stvp->num_inputs++;
> + if ((stvp->Base.Base.DoubleInputsRead & BITFIELD64_BIT(attr)) != 0) {
> + /* add placeholder for second part of a double attribute */
> + stvp->index_to_input[stvp->num_inputs] = ST_DOUBLE_ATTRIB_PLACEHOLDER;
> + stvp->num_inputs++;
Why not just set this to the right thing? Or is there no right thing?
Seems like you end up using it as though it were attr + 1...
> + }
> }
> }
> /* bit of a hack, presetup potentially unused edgeflag input */
> diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h
> index b2c86fa..a2c5606 100644
> --- a/src/mesa/state_tracker/st_program.h
> +++ b/src/mesa/state_tracker/st_program.h
> @@ -45,6 +45,7 @@
> extern "C" {
> #endif
>
> +#define ST_DOUBLE_ATTRIB_PLACEHOLDER 0xffffffff
>
> /** Fragment program variant key */
> struct st_fp_variant_key
> --
> 2.1.0
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list