[PATCH 10/20] glamor: Add glamor_program based fill/set/get spans
Markus Wick
markus at selfnet.de
Wed Mar 19 05:55:43 PDT 2014
Am 2014-03-19 06:09, schrieb Keith Packard:
> This accelerates spans operations using GPU-based geometry computation
>
> Signed-off-by: Keith Packard <keithp at keithp.com>
> ---
> glamor/Makefile.am | 1 +
> glamor/glamor.c | 2 +-
> glamor/glamor_core.c | 4 +-
> glamor/glamor_priv.h | 18 +++
> glamor/glamor_spans.c | 299
> ++++++++++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 321 insertions(+), 3 deletions(-)
> create mode 100644 glamor/glamor_spans.c
>
> diff --git a/glamor/Makefile.am b/glamor/Makefile.am
> index 361c0d6..d598f95 100644
> --- a/glamor/Makefile.am
> +++ b/glamor/Makefile.am
> @@ -25,6 +25,7 @@ libglamor_la_SOURCES = \
> glamor_prepare.c \
> glamor_prepare.h \
> glamor_program.c \
> + glamor_spans.c \
> glamor_transfer.c \
> glamor_transfer.h \
> glamor_transform.c \
> diff --git a/glamor/glamor.c b/glamor/glamor.c
> index 79914bf..abf2d47 100644
> --- a/glamor/glamor.c
> +++ b/glamor/glamor.c
> @@ -426,7 +426,7 @@ glamor_init(ScreenPtr screen, unsigned int flags)
> screen->DestroyPixmap = glamor_destroy_pixmap;
>
> glamor_priv->saved_procs.get_spans = screen->GetSpans;
> - screen->GetSpans = glamor_get_spans;
> + screen->GetSpans = glamor_getspans;
>
> glamor_priv->saved_procs.get_image = screen->GetImage;
> screen->GetImage = glamor_get_image;
> diff --git a/glamor/glamor_core.c b/glamor/glamor_core.c
> index 6c0b3c8..7983bd9 100644
> --- a/glamor/glamor_core.c
> +++ b/glamor/glamor_core.c
> @@ -413,8 +413,8 @@ glamor_stipple(PixmapPtr pixmap, PixmapPtr stipple,
> }
>
> GCOps glamor_gc_ops = {
> - .FillSpans = glamor_fill_spans,
> - .SetSpans = glamor_set_spans,
> + .FillSpans = glamor_fillspans,
> + .SetSpans = glamor_setspans,
> .PutImage = glamor_put_image,
> .CopyArea = glamor_copy_area,
> .CopyPlane = glamor_copy_plane,
> diff --git a/glamor/glamor_priv.h b/glamor/glamor_priv.h
> index e4be4f3..b3b1ac7 100644
> --- a/glamor/glamor_priv.h
> +++ b/glamor/glamor_priv.h
> @@ -222,6 +222,9 @@ typedef struct glamor_screen_private {
> /* glamor point shader */
> glamor_program point_prog;
>
> + /* glamor spans shaders */
> + glamor_program_fill fill_spans_program;
> +
> /* vertext/elment_index buffer object for render */
> GLuint vbo, ebo;
> /** Next offset within the VBO that glamor_get_vbo_space() will
> use. */
> @@ -974,6 +977,21 @@ RegionPtr glamor_copy_plane(DrawablePtr pSrc,
> DrawablePtr pDst, GCPtr pGC,
> int dstx, int dsty,
> unsigned long bitPlane);
>
> +/* glamor_spans.c */
> +void
> +glamor_fillspans(DrawablePtr drawable,
> + GCPtr gc,
> + int n, DDXPointPtr points, int *widths, int sorted);
> +
> +void
> +glamor_getspans(DrawablePtr drawable, int wmax,
> + DDXPointPtr points, int *widths, int count, char
> *dst);
> +
> +void
> +glamor_setspans(DrawablePtr drawable, GCPtr gc, char *src,
> + DDXPointPtr points, int *widths, int numPoints, int
> sorted);
> +
> +/* glamor_glyphblt.c */
> void glamor_image_glyph_blt(DrawablePtr pDrawable, GCPtr pGC,
> int x, int y, unsigned int nglyph,
> CharInfoPtr *ppci, void *pglyphBase);
> diff --git a/glamor/glamor_spans.c b/glamor/glamor_spans.c
> new file mode 100644
> index 0000000..6cb635b
> --- /dev/null
> +++ b/glamor/glamor_spans.c
> @@ -0,0 +1,299 @@
> +/*
> + * Copyright © 2014 Keith Packard
> + *
> + * Permission to use, copy, modify, distribute, and sell this software
> and its
> + * documentation for any purpose is hereby granted without fee,
> provided that
> + * the above copyright notice appear in all copies and that both that
> copyright
> + * notice and this permission notice appear in supporting
> documentation, and
> + * that the name of the copyright holders not be used in advertising
> or
> + * publicity pertaining to distribution of the software without
> specific,
> + * written prior permission. The copyright holders make no
> representations
> + * about the suitability of this software for any purpose. It is
> provided "as
> + * is" without express or implied warranty.
> + *
> + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
> SOFTWARE,
> + * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN
> NO
> + * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL,
> INDIRECT OR
> + * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
> OF USE,
> + * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
> OTHER
> + * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
> PERFORMANCE
> + * OF THIS SOFTWARE.
> + */
> +
> +#include "glamor_priv.h"
> +#include "glamor_transform.h"
> +#include "glamor_transfer.h"
> +#include "glamor_prepare.h"
> +
> +glamor_program fill_spans_progs[4];
> +
> +static const glamor_facet glamor_facet_fillspans = {
> + .name = "fill_spans",
> + .version = 130,
> + .vs_vars = "attribute vec3 primitive;\n",
> + .vs_exec = (" vec2 pos = vec2(primitive.z,1) *
> vec2(gl_VertexID&1, (gl_VertexID&2)>>1);\n"
> + GLAMOR_POS(gl_Position, (primitive.xy + pos))),
> +};
> +
> +void
> +glamor_fillspans(DrawablePtr drawable,
> + GCPtr gc,
> + int n, DDXPointPtr points, int *widths, int sorted)
> +{
> + ScreenPtr screen = drawable->pScreen;
> + glamor_screen_private *glamor_priv =
> glamor_get_screen_private(screen);
> + PixmapPtr pixmap = glamor_get_drawable_pixmap(drawable);
> + glamor_pixmap_private *pixmap_priv;
> + glamor_program *prog;
> + int off_x, off_y;
> + GLshort *v;
> + char *vbo_offset;
> + int c;
> + int box_x, box_y;
> +
> + pixmap_priv = glamor_get_pixmap_private(pixmap);
> + if (!GLAMOR_PIXMAP_PRIV_HAS_FBO(pixmap_priv))
> + goto bail;
> +
> + glamor_get_context(glamor_priv);
> +
> + prog = glamor_use_program_fill(pixmap, gc,
> &glamor_priv->fill_spans_program,
> + &glamor_facet_fillspans);
> +
> + if (!prog)
> + goto bail_ctx;
> +
> + /* Set up the vertex buffers for the points */
> +
> + v = glamor_get_vbo_space(drawable->pScreen, n * (3 * sizeof
> (GLshort)), &vbo_offset);
> +
> + glEnableVertexAttribArray(GLAMOR_VERTEX_POS);
> + glVertexAttribDivisor(GLAMOR_VERTEX_POS, 1);
This will crash when GL_ARB_instanced_arrays isn't available.
> + glVertexAttribPointer(GLAMOR_VERTEX_POS, 3, GL_SHORT, GL_FALSE,
> + 3 * sizeof (GLshort), vbo_offset);
> +
> + for (c = 0; c < n; c++) {
> + v[0] = points->x;
> + v[1] = points->y;
> + v[2] = *widths++;
> + points++;
> + v += 3;
> + }
> +
> + glamor_put_vbo_space(screen);
> +
> + glEnable(GL_SCISSOR_TEST);
> +
> + glamor_pixmap_loop(pixmap_priv, box_x, box_y) {
> + int nbox = RegionNumRects(gc->pCompositeClip);
> + BoxPtr box = RegionRects(gc->pCompositeClip);
> +
> + glamor_set_destination_drawable(drawable, box_x, box_y,
> FALSE, FALSE, prog->matrix_uniform, &off_x, &off_y);
> +
> + while (nbox--) {
> + glScissor(box->x1 + off_x,
> + box->y1 + off_y,
> + box->x2 - box->x1,
> + box->y2 - box->y1);
> + box++;
> + glDrawArraysInstanced(GL_TRIANGLE_STRIP, 0, 4, n);
> + }
What are common values for RegionNumRects(gc->pCompositeClip)?
For lots (3+) of boxes, it's likely faster to pass the box as attribute
(with a much higher divisor) and discard in the pixel shader.
> + }
> +
> + glDisable(GL_SCISSOR_TEST);
> + glDisable(GL_COLOR_LOGIC_OP);
> + glVertexAttribDivisor(GLAMOR_VERTEX_POS, 0);
> + glDisableVertexAttribArray(GLAMOR_VERTEX_POS);
> +
> + glamor_put_context(glamor_priv);
> + return;
> +bail_ctx:
> + glDisable(GL_COLOR_LOGIC_OP);
> + glamor_put_context(glamor_priv);
> +bail:
> + if (glamor_prep_drawable(drawable, TRUE) &&
> + glamor_prep_gc(gc)) {
> + fbFillSpans(drawable, gc, n, points, widths, sorted);
> + }
> + glamor_fini_gc(gc);
> + glamor_fini_drawable(drawable);
> +}
> +
> +void
> +glamor_getspans(DrawablePtr drawable, int wmax,
> + DDXPointPtr points, int *widths, int count, char *dst)
> +{
> + ScreenPtr screen = drawable->pScreen;
> + glamor_screen_private *glamor_priv =
> glamor_get_screen_private(screen);
> + PixmapPtr pixmap = glamor_get_drawable_pixmap(drawable);
> + glamor_pixmap_private *pixmap_priv;
> + int box_x, box_y;
> + int n;
> + char *d;
> + GLenum type;
> + GLenum format;
> + int off_x, off_y;
> +
> + pixmap_priv = glamor_get_pixmap_private(pixmap);
> + if (!GLAMOR_PIXMAP_PRIV_HAS_FBO(pixmap_priv))
> + goto bail;
> +
> + glamor_get_drawable_deltas(drawable, pixmap, &off_x, &off_y);
> +
> + glamor_format_for_pixmap(pixmap, &format, &type);
> +
> + glamor_get_context(glamor_priv);
> +
> + glamor_pixmap_loop(pixmap_priv, box_x, box_y) {
> + BoxPtr box =
> glamor_pixmap_box_at(pixmap_priv, box_x, box_y);
> + glamor_pixmap_fbo *fbo =
> glamor_pixmap_fbo_at(pixmap_priv, box_x, box_y);
> +
> + glBindFramebuffer(GL_READ_FRAMEBUFFER, fbo->fb);
> + glPixelStorei(GL_PACK_ALIGNMENT, 4);
> +
> + d = dst;
> + for (n = 0; n < count; n++) {
> + int x1 = points[n].x + off_x;
> + int y = points[n].y + off_y;
> + int w = widths[n];
> + int x2 = x1 + w;
> + char *l;
> +
> + l = d;
> + d += PixmapBytePad(w, drawable->depth);
> +
> + /* clip */
> + if (x1 < box->x1) {
> + l += (box->x1 - x1) * (drawable->bitsPerPixel >> 3);
Do bitsPerPixel have to be a multiple of 8? Isn't there a shared
function to calculate this pitch?
> + x1 = box->x1;
> + }
> + if (x2 > box->x2)
> + x2 = box->x2;
> +
> + if (x1 >= x2)
> + continue;
> + if (y < box->y1)
> + continue;
> + if (y >= box->y2)
> + continue;
> +
> + glReadPixels(x1 - box->x1, y - box->y1, x2 - x1, 1,
> format, type, l);
tbh, without an PBO, this is wrose than the fb fallback as it's one
stall per span.
Also with PBO, it's still a huge overhead per span. I guess it's faster
for lots of spans (maybe 20+) to always fall back to fb.
But I don't see another non-hacky way to implement this.
> + }
> + }
> +
> + glamor_put_context(glamor_priv);
> + return;
> +
> +bail:
> + if (glamor_prep_drawable(drawable, FALSE))
> + fbGetSpans(drawable, wmax, points, widths, count, dst);
> + glamor_fini_drawable(drawable);
> +}
> +
> +void
> +glamor_setspans(DrawablePtr drawable, GCPtr gc, char *src,
> + DDXPointPtr points, int *widths, int numPoints, int
> sorted)
> +{
> + ScreenPtr screen = drawable->pScreen;
> + glamor_screen_private *glamor_priv =
> glamor_get_screen_private(screen);
> + PixmapPtr pixmap = glamor_get_drawable_pixmap(drawable);
> + glamor_pixmap_private *pixmap_priv;
> + int box_x, box_y;
> + int n;
> + char *s;
> + GLenum type;
> + GLenum format;
> + int off_x, off_y;
> +
> + pixmap_priv = glamor_get_pixmap_private(pixmap);
> + if (!GLAMOR_PIXMAP_PRIV_HAS_FBO(pixmap_priv))
> + goto bail;
> +
> + if (gc->alu != GXcopy)
> + goto bail;
> +
> + if (!glamor_pm_is_solid(&pixmap->drawable, gc->planemask))
> + goto bail;
> +
> + glamor_get_drawable_deltas(drawable, pixmap, &off_x, &off_y);
> + glamor_format_for_pixmap(pixmap, &format, &type);
> +
> + glamor_get_context(glamor_priv);
> +
> + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
> + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
> +
> + glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
> +
> + glamor_pixmap_loop(pixmap_priv, box_x, box_y) {
> + BoxPtr box =
> glamor_pixmap_box_at(pixmap_priv, box_x, box_y);
> + glamor_pixmap_fbo *fbo =
> glamor_pixmap_fbo_at(pixmap_priv, box_x, box_y);
> +
> + glActiveTexture(GL_TEXTURE0);
> + glBindTexture(GL_TEXTURE_2D, fbo->tex);
> +
> + s = src;
> + for (n = 0; n < numPoints; n++) {
> +
> + BoxPtr clip_box = RegionRects(gc->pCompositeClip);
> + int nclip_box =
> RegionNumRects(gc->pCompositeClip);
> + int w = widths[n];
> + int y = points[n].y;
> + int x = points[n].x;
> +
> + while (nclip_box--) {
> + int x1 = x;
> + int x2 = x + w;
> + int y1 = y;
> + char *l = s;
> +
> + /* clip to composite clip */
> + if (x1 < clip_box->x1) {
> + l += (clip_box->x1 - x1) * (drawable->bitsPerPixel
> >> 3);
> + x1 = clip_box->x1;
> + }
> + if (x2 > clip_box->x2)
> + x2 = clip_box->x2;
> +
> + if (y < clip_box->y1)
> + continue;
> + if (y >= clip_box->y2)
> + continue;
> +
> + /* adjust to pixmap coordinates */
> + x1 += off_x;
> + x2 += off_x;
> + y1 += off_y;
> +
> + if (x1 < box->x1) {
> + l += (box->x1 - x1) * (drawable->bitsPerPixel >>
> 3);
> + x1 = box->x1;
> + }
> + if (x2 > box->x2)
> + x2 = box->x2;
> +
> + if (x1 >= x2)
> + continue;
> + if (y1 < box->y1)
> + continue;
> + if (y1 >= box->y2)
> + continue;
> +
> + glTexSubImage2D(GL_TEXTURE_2D, 0,
> + x1 - box->x1, y1 - box->y1, x2 - x1,
> 1,
> + format, type,
> + l);
We do have to find a way to upload this texture at once and to emit one
draw call (per fbo + box?).
eg temporary 1D texture, better a texture buffer object.
Everything is better than calling one gl function per span, also fb.
> + }
> + s += PixmapBytePad(w, drawable->depth);
> + }
> + }
> +
> + glamor_put_context(glamor_priv);
> + return;
> +
> +bail:
> + if (glamor_prep_drawable(drawable, TRUE) && glamor_prep_gc(gc))
> + fbSetSpans(drawable, gc, src, points, widths, numPoints,
> sorted);
> + glamor_fini_gc(gc);
> + glamor_fini_drawable(drawable);
> +}
More information about the xorg-devel
mailing list