[PATCH 10/20] glamor: Add glamor_program based fill/set/get spans

Markus Wick markus at selfnet.de
Wed Mar 19 05:55:43 PDT 2014


Am 2014-03-19 06:09, schrieb Keith Packard:
> This accelerates spans operations using GPU-based geometry computation
> 
> Signed-off-by: Keith Packard <keithp at keithp.com>
> ---
>  glamor/Makefile.am    |   1 +
>  glamor/glamor.c       |   2 +-
>  glamor/glamor_core.c  |   4 +-
>  glamor/glamor_priv.h  |  18 +++
>  glamor/glamor_spans.c | 299 
> ++++++++++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 321 insertions(+), 3 deletions(-)
>  create mode 100644 glamor/glamor_spans.c
> 
> diff --git a/glamor/Makefile.am b/glamor/Makefile.am
> index 361c0d6..d598f95 100644
> --- a/glamor/Makefile.am
> +++ b/glamor/Makefile.am
> @@ -25,6 +25,7 @@ libglamor_la_SOURCES = \
>  	glamor_prepare.c \
>  	glamor_prepare.h \
>  	glamor_program.c \
> +	glamor_spans.c \
>  	glamor_transfer.c \
>  	glamor_transfer.h \
>  	glamor_transform.c \
> diff --git a/glamor/glamor.c b/glamor/glamor.c
> index 79914bf..abf2d47 100644
> --- a/glamor/glamor.c
> +++ b/glamor/glamor.c
> @@ -426,7 +426,7 @@ glamor_init(ScreenPtr screen, unsigned int flags)
>          screen->DestroyPixmap = glamor_destroy_pixmap;
> 
>          glamor_priv->saved_procs.get_spans = screen->GetSpans;
> -        screen->GetSpans = glamor_get_spans;
> +        screen->GetSpans = glamor_getspans;
> 
>          glamor_priv->saved_procs.get_image = screen->GetImage;
>          screen->GetImage = glamor_get_image;
> diff --git a/glamor/glamor_core.c b/glamor/glamor_core.c
> index 6c0b3c8..7983bd9 100644
> --- a/glamor/glamor_core.c
> +++ b/glamor/glamor_core.c
> @@ -413,8 +413,8 @@ glamor_stipple(PixmapPtr pixmap, PixmapPtr stipple,
>  }
> 
>  GCOps glamor_gc_ops = {
> -    .FillSpans = glamor_fill_spans,
> -    .SetSpans = glamor_set_spans,
> +    .FillSpans = glamor_fillspans,
> +    .SetSpans = glamor_setspans,
>      .PutImage = glamor_put_image,
>      .CopyArea = glamor_copy_area,
>      .CopyPlane = glamor_copy_plane,
> diff --git a/glamor/glamor_priv.h b/glamor/glamor_priv.h
> index e4be4f3..b3b1ac7 100644
> --- a/glamor/glamor_priv.h
> +++ b/glamor/glamor_priv.h
> @@ -222,6 +222,9 @@ typedef struct glamor_screen_private {
>      /* glamor point shader */
>      glamor_program point_prog;
> 
> +    /* glamor spans shaders */
> +    glamor_program_fill fill_spans_program;
> +
>      /* vertext/elment_index buffer object for render */
>      GLuint vbo, ebo;
>      /** Next offset within the VBO that glamor_get_vbo_space() will 
> use. */
> @@ -974,6 +977,21 @@ RegionPtr glamor_copy_plane(DrawablePtr pSrc,
> DrawablePtr pDst, GCPtr pGC,
>                              int dstx, int dsty,
>                              unsigned long bitPlane);
> 
> +/* glamor_spans.c */
> +void
> +glamor_fillspans(DrawablePtr drawable,
> +                 GCPtr gc,
> +                 int n, DDXPointPtr points, int *widths, int sorted);
> +
> +void
> +glamor_getspans(DrawablePtr drawable, int wmax,
> +                DDXPointPtr points, int *widths, int count, char 
> *dst);
> +
> +void
> +glamor_setspans(DrawablePtr drawable, GCPtr gc, char *src,
> +                DDXPointPtr points, int *widths, int numPoints, int 
> sorted);
> +
> +/* glamor_glyphblt.c */
>  void glamor_image_glyph_blt(DrawablePtr pDrawable, GCPtr pGC,
>                              int x, int y, unsigned int nglyph,
>                              CharInfoPtr *ppci, void *pglyphBase);
> diff --git a/glamor/glamor_spans.c b/glamor/glamor_spans.c
> new file mode 100644
> index 0000000..6cb635b
> --- /dev/null
> +++ b/glamor/glamor_spans.c
> @@ -0,0 +1,299 @@
> +/*
> + * Copyright © 2014 Keith Packard
> + *
> + * Permission to use, copy, modify, distribute, and sell this software 
> and its
> + * documentation for any purpose is hereby granted without fee, 
> provided that
> + * the above copyright notice appear in all copies and that both that 
> copyright
> + * notice and this permission notice appear in supporting 
> documentation, and
> + * that the name of the copyright holders not be used in advertising 
> or
> + * publicity pertaining to distribution of the software without 
> specific,
> + * written prior permission.  The copyright holders make no 
> representations
> + * about the suitability of this software for any purpose.  It is 
> provided "as
> + * is" without express or implied warranty.
> + *
> + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS 
> SOFTWARE,
> + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN 
> NO
> + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, 
> INDIRECT OR
> + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS 
> OF USE,
> + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 
> OTHER
> + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 
> PERFORMANCE
> + * OF THIS SOFTWARE.
> + */
> +
> +#include "glamor_priv.h"
> +#include "glamor_transform.h"
> +#include "glamor_transfer.h"
> +#include "glamor_prepare.h"
> +
> +glamor_program  fill_spans_progs[4];
> +
> +static const glamor_facet glamor_facet_fillspans = {
> +    .name = "fill_spans",
> +    .version = 130,
> +    .vs_vars =  "attribute vec3 primitive;\n",
> +    .vs_exec = ("       vec2 pos = vec2(primitive.z,1) *
> vec2(gl_VertexID&1, (gl_VertexID&2)>>1);\n"
> +                GLAMOR_POS(gl_Position, (primitive.xy + pos))),
> +};
> +
> +void
> +glamor_fillspans(DrawablePtr drawable,
> +                 GCPtr gc,
> +                 int n, DDXPointPtr points, int *widths, int sorted)
> +{
> +    ScreenPtr screen = drawable->pScreen;
> +    glamor_screen_private *glamor_priv = 
> glamor_get_screen_private(screen);
> +    PixmapPtr pixmap = glamor_get_drawable_pixmap(drawable);
> +    glamor_pixmap_private *pixmap_priv;
> +    glamor_program *prog;
> +    int off_x, off_y;
> +    GLshort *v;
> +    char *vbo_offset;
> +    int c;
> +    int box_x, box_y;
> +
> +    pixmap_priv = glamor_get_pixmap_private(pixmap);
> +    if (!GLAMOR_PIXMAP_PRIV_HAS_FBO(pixmap_priv))
> +        goto bail;
> +
> +    glamor_get_context(glamor_priv);
> +
> +    prog = glamor_use_program_fill(pixmap, gc,
> &glamor_priv->fill_spans_program,
> +                                   &glamor_facet_fillspans);
> +
> +    if (!prog)
> +        goto bail_ctx;
> +
> +    /* Set up the vertex buffers for the points */
> +
> +    v = glamor_get_vbo_space(drawable->pScreen, n * (3 * sizeof
> (GLshort)), &vbo_offset);
> +
> +    glEnableVertexAttribArray(GLAMOR_VERTEX_POS);
> +    glVertexAttribDivisor(GLAMOR_VERTEX_POS, 1);

This will crash when GL_ARB_instanced_arrays isn't available.

> +    glVertexAttribPointer(GLAMOR_VERTEX_POS, 3, GL_SHORT, GL_FALSE,
> +                          3 * sizeof (GLshort), vbo_offset);
> +
> +    for (c = 0; c < n; c++) {
> +        v[0] = points->x;
> +        v[1] = points->y;
> +        v[2] = *widths++;
> +        points++;
> +        v += 3;
> +    }
> +
> +    glamor_put_vbo_space(screen);
> +
> +    glEnable(GL_SCISSOR_TEST);
> +
> +    glamor_pixmap_loop(pixmap_priv, box_x, box_y) {
> +        int nbox = RegionNumRects(gc->pCompositeClip);
> +        BoxPtr box = RegionRects(gc->pCompositeClip);
> +
> +        glamor_set_destination_drawable(drawable, box_x, box_y,
> FALSE, FALSE, prog->matrix_uniform, &off_x, &off_y);
> +
> +        while (nbox--) {
> +            glScissor(box->x1 + off_x,
> +                      box->y1 + off_y,
> +                      box->x2 - box->x1,
> +                      box->y2 - box->y1);
> +            box++;
> +            glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, n);
> +        }

What are common values for RegionNumRects(gc->pCompositeClip)?
For lots (3+) of boxes, it's likely faster to pass the box as attribute 
(with a much higher divisor) and discard in the pixel shader.

> +    }
> +
> +    glDisable(GL_SCISSOR_TEST);
> +    glDisable(GL_COLOR_LOGIC_OP);
> +    glVertexAttribDivisor(GLAMOR_VERTEX_POS, 0);
> +    glDisableVertexAttribArray(GLAMOR_VERTEX_POS);
> +
> +    glamor_put_context(glamor_priv);
> +    return;
> +bail_ctx:
> +    glDisable(GL_COLOR_LOGIC_OP);
> +    glamor_put_context(glamor_priv);
> +bail:
> +    if (glamor_prep_drawable(drawable, TRUE) &&
> +        glamor_prep_gc(gc)) {
> +        fbFillSpans(drawable, gc, n, points, widths, sorted);
> +    }
> +    glamor_fini_gc(gc);
> +    glamor_fini_drawable(drawable);
> +}
> +
> +void
> +glamor_getspans(DrawablePtr drawable, int wmax,
> +                DDXPointPtr points, int *widths, int count, char *dst)
> +{
> +    ScreenPtr screen = drawable->pScreen;
> +    glamor_screen_private *glamor_priv = 
> glamor_get_screen_private(screen);
> +    PixmapPtr pixmap = glamor_get_drawable_pixmap(drawable);
> +    glamor_pixmap_private *pixmap_priv;
> +    int box_x, box_y;
> +    int n;
> +    char *d;
> +    GLenum type;
> +    GLenum format;
> +    int off_x, off_y;
> +
> +    pixmap_priv = glamor_get_pixmap_private(pixmap);
> +    if (!GLAMOR_PIXMAP_PRIV_HAS_FBO(pixmap_priv))
> +        goto bail;
> +
> +    glamor_get_drawable_deltas(drawable, pixmap, &off_x, &off_y);
> +
> +    glamor_format_for_pixmap(pixmap, &format, &type);
> +
> +    glamor_get_context(glamor_priv);
> +
> +    glamor_pixmap_loop(pixmap_priv, box_x, box_y) {
> +        BoxPtr                  box =
> glamor_pixmap_box_at(pixmap_priv, box_x, box_y);
> +        glamor_pixmap_fbo       *fbo =
> glamor_pixmap_fbo_at(pixmap_priv, box_x, box_y);
> +
> +        glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo->fb);
> +        glPixelStorei(GL_PACK_ALIGNMENT, 4);
> +
> +        d = dst;
> +        for (n = 0; n < count; n++) {
> +            int x1 = points[n].x + off_x;
> +            int y = points[n].y + off_y;
> +            int w = widths[n];
> +            int x2 = x1 + w;
> +            char *l;
> +
> +            l = d;
> +            d += PixmapBytePad(w, drawable->depth);
> +
> +            /* clip */
> +            if (x1 < box->x1) {
> +                l += (box->x1 - x1) * (drawable->bitsPerPixel >> 3);

Do bitsPerPixel have to be a multiple of 8? Isn't there a shared 
function to calculate this pitch?

> +                x1 = box->x1;
> +            }
> +            if (x2 > box->x2)
> +                x2 = box->x2;
> +
> +            if (x1 >= x2)
> +                continue;
> +            if (y < box->y1)
> +                continue;
> +            if (y >= box->y2)
> +                continue;
> +
> +            glReadPixels(x1 - box->x1, y - box->y1, x2 - x1, 1,
> format, type, l);

tbh, without an PBO, this is wrose than the fb fallback as it's one 
stall per span.
Also with PBO, it's still a huge overhead per span. I guess it's faster 
for lots of spans (maybe 20+) to always fall back to fb.
But I don't see another non-hacky way to implement this.

> +        }
> +    }
> +
> +    glamor_put_context(glamor_priv);
> +    return;
> +
> +bail:
> +    if (glamor_prep_drawable(drawable, FALSE))
> +        fbGetSpans(drawable, wmax, points, widths, count, dst);
> +    glamor_fini_drawable(drawable);
> +}
> +
> +void
> +glamor_setspans(DrawablePtr drawable, GCPtr gc, char *src,
> +                DDXPointPtr points, int *widths, int numPoints, int 
> sorted)
> +{
> +    ScreenPtr screen = drawable->pScreen;
> +    glamor_screen_private *glamor_priv = 
> glamor_get_screen_private(screen);
> +    PixmapPtr pixmap = glamor_get_drawable_pixmap(drawable);
> +    glamor_pixmap_private *pixmap_priv;
> +    int box_x, box_y;
> +    int n;
> +    char *s;
> +    GLenum type;
> +    GLenum format;
> +    int off_x, off_y;
> +
> +    pixmap_priv = glamor_get_pixmap_private(pixmap);
> +    if (!GLAMOR_PIXMAP_PRIV_HAS_FBO(pixmap_priv))
> +        goto bail;
> +
> +    if (gc->alu != GXcopy)
> +        goto bail;
> +
> +    if (!glamor_pm_is_solid(&pixmap->drawable, gc->planemask))
> +        goto bail;
> +
> +    glamor_get_drawable_deltas(drawable, pixmap, &off_x, &off_y);
> +    glamor_format_for_pixmap(pixmap, &format, &type);
> +
> +    glamor_get_context(glamor_priv);
> +
> +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
> +    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
> +
> +    glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
> +
> +    glamor_pixmap_loop(pixmap_priv, box_x, box_y) {
> +        BoxPtr                  box =
> glamor_pixmap_box_at(pixmap_priv, box_x, box_y);
> +        glamor_pixmap_fbo       *fbo =
> glamor_pixmap_fbo_at(pixmap_priv, box_x, box_y);
> +
> +        glActiveTexture(GL_TEXTURE0);
> +        glBindTexture(GL_TEXTURE_2D, fbo->tex);
> +
> +        s = src;
> +        for (n = 0; n < numPoints; n++) {
> +
> +            BoxPtr      clip_box = RegionRects(gc->pCompositeClip);
> +            int         nclip_box = 
> RegionNumRects(gc->pCompositeClip);
> +            int         w = widths[n];
> +            int         y = points[n].y;
> +            int         x = points[n].x;
> +
> +            while (nclip_box--) {
> +                int x1 = x;
> +                int x2 = x + w;
> +                int y1 = y;
> +                char *l = s;
> +
> +                /* clip to composite clip */
> +                if (x1 < clip_box->x1) {
> +                    l += (clip_box->x1 - x1) * (drawable->bitsPerPixel 
> >> 3);
> +                    x1 = clip_box->x1;
> +                }
> +                if (x2 > clip_box->x2)
> +                    x2 = clip_box->x2;
> +
> +                if (y < clip_box->y1)
> +                    continue;
> +                if (y >= clip_box->y2)
> +                    continue;
> +
> +                /* adjust to pixmap coordinates */
> +                x1 += off_x;
> +                x2 += off_x;
> +                y1 += off_y;
> +
> +                if (x1 < box->x1) {
> +                    l += (box->x1 - x1) * (drawable->bitsPerPixel >> 
> 3);
> +                    x1 = box->x1;
> +                }
> +                if (x2 > box->x2)
> +                    x2 = box->x2;
> +
> +                if (x1 >= x2)
> +                    continue;
> +                if (y1 < box->y1)
> +                    continue;
> +                if (y1 >= box->y2)
> +                    continue;
> +
> +                glTexSubImage2D(GL_TEXTURE_2D, 0,
> +                                x1 - box->x1, y1 - box->y1, x2 - x1, 
> 1,
> +                                format, type,
> +                                l);

We do have to find a way to upload this texture at once and to emit one 
draw call (per fbo + box?).
eg temporary 1D texture, better a texture buffer object.
Everything is better than calling one gl function per span, also fb.

> +            }
> +            s += PixmapBytePad(w, drawable->depth);
> +        }
> +    }
> +
> +    glamor_put_context(glamor_priv);
> +    return;
> +
> +bail:
> +    if (glamor_prep_drawable(drawable, TRUE) && glamor_prep_gc(gc))
> +        fbSetSpans(drawable, gc, src, points, widths, numPoints, 
> sorted);
> +    glamor_fini_gc(gc);
> +    glamor_fini_drawable(drawable);
> +}


More information about the xorg-devel mailing list