xf86-video-intel: 7 commits - src/i830.h src/i830_memory.c src/i830_uxa.c uxa/uxa.c uxa/uxa-glyphs.c uxa/uxa-priv.h uxa/uxa-render.c

Chris Wilson ickle at kemper.freedesktop.org
Wed May 12 05:04:59 PDT 2010


 src/i830.h        |    1 
 src/i830_memory.c |   63 +++++-----
 src/i830_uxa.c    |   15 --
 uxa/uxa-glyphs.c  |  119 +++++++++++++++++++
 uxa/uxa-priv.h    |    8 +
 uxa/uxa-render.c  |  322 ++++++++++++++++++++++++++++++++++++++++--------------
 uxa/uxa.c         |    4 
 7 files changed, 414 insertions(+), 118 deletions(-)

New commits:
commit 6c27f6e4f76b97df71094acf25083b2922966b42
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 12 12:01:46 2010 +0100

    uxa: Avoid glyph ping-pong with !offscreen destination
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-glyphs.c b/uxa/uxa-glyphs.c
index be89f79..f8bb7f5 100644
--- a/uxa/uxa-glyphs.c
+++ b/uxa/uxa-glyphs.c
@@ -735,6 +735,120 @@ uxa_glyphs_intersect(int nlist, GlyphListPtr list, GlyphPtr * glyphs)
 	return FALSE;
 }
 
+static void
+uxa_check_glyphs(CARD8 op,
+		 PicturePtr src,
+		 PicturePtr dst,
+		 PictFormatPtr maskFormat,
+		 INT16 xSrc,
+		 INT16 ySrc, int nlist, GlyphListPtr list, GlyphPtr * glyphs)
+{
+	int screen = dst->pDrawable->pScreen->myNum;
+	pixman_image_t *image;
+	PixmapPtr scratch;
+	PicturePtr mask;
+	int width = 0, height = 0;
+	int x, y, n;
+	int xDst = list->xOff, yDst = list->yOff;
+	BoxRec extents = { 0, 0, 0, 0 };
+
+	if (maskFormat) {
+		pixman_format_code_t format;
+		CARD32 component_alpha;
+		int error;
+
+		uxa_glyph_extents(nlist, list, glyphs, &extents);
+		if (extents.x2 <= extents.x1 || extents.y2 <= extents.y1)
+			return;
+
+		width = extents.x2 - extents.x1;
+		height = extents.y2 - extents.y1;
+
+		format = maskFormat->format |
+			(BitsPerPixel(maskFormat->depth) << 24);
+		image =
+			pixman_image_create_bits(format, width, height, NULL, 0);
+		if (!image)
+			return;
+
+		scratch = GetScratchPixmapHeader(dst->pDrawable->pScreen, width, height,
+						 PIXMAN_FORMAT_DEPTH(format),
+						 PIXMAN_FORMAT_BPP(format),
+						 pixman_image_get_stride(image),
+						 pixman_image_get_data(image));
+
+		if (!scratch) {
+			pixman_image_unref(image);
+			return;
+		}
+
+		component_alpha = NeedsComponent(maskFormat->format);
+		mask = CreatePicture(0, &scratch->drawable,
+				     maskFormat, CPComponentAlpha,
+				     &component_alpha, serverClient, &error);
+		if (!mask) {
+			FreeScratchPixmapHeader(scratch);
+			pixman_image_unref(image);
+			return;
+		}
+
+		x = -extents.x1;
+		y = -extents.y1;
+	} else {
+		mask = dst;
+		x = 0;
+		y = 0;
+	}
+
+	while (nlist--) {
+		x += list->xOff;
+		y += list->yOff;
+		n = list->len;
+		while (n--) {
+			GlyphPtr glyph = *glyphs++;
+			PicturePtr g = GlyphPicture(glyph)[screen];
+			if (g) {
+				if (maskFormat) {
+					CompositePicture(PictOpAdd, g, NULL, mask,
+							 0, 0,
+							 0, 0,
+							 x - glyph->info.x,
+							 y - glyph->info.y,
+							 glyph->info.width,
+							 glyph->info.height);
+				} else {
+					CompositePicture(op, src, g, dst,
+							 xSrc + (x - glyph->info.x) - xDst,
+							 ySrc + (y - glyph->info.y) - yDst,
+							 0, 0,
+							 x - glyph->info.x,
+							 y - glyph->info.y,
+							 glyph->info.width,
+							 glyph->info.height);
+				}
+			}
+
+			x += glyph->info.xOff;
+			y += glyph->info.yOff;
+		}
+		list++;
+	}
+
+	if (maskFormat) {
+		x = extents.x1;
+		y = extents.y1;
+		CompositePicture(op, src, mask, dst,
+				 xSrc + x - xDst,
+				 ySrc + y - yDst,
+				 0, 0,
+				 x, y,
+				 width, height);
+		FreePicture(mask, 0);
+		FreeScratchPixmapHeader(scratch);
+		pixman_image_unref(image);
+	}
+}
+
 void
 uxa_glyphs(CARD8 op,
 	   PicturePtr pSrc,
@@ -756,6 +870,11 @@ uxa_glyphs(CARD8 op,
 	CARD32 component_alpha;
 	uxa_glyph_buffer_t buffer;
 
+	if (!uxa_drawable_is_offscreen(pDst->pDrawable)) {
+	    uxa_check_glyphs(op, pSrc, pDst, maskFormat, xSrc, ySrc, nlist, list, glyphs);
+	    return;
+	}
+
 	/* If we don't have a mask format but all the glyphs have the same format
 	 * and don't intersect, use the glyph format as mask format for the full
 	 * benefits of the glyph cache.
commit d5383c2073e181b9ec352461c05d6202ad37f9d6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 12 10:53:14 2010 +0100

    uxa: Avoid ping-pong with !offscreen destination and traps
    
    If we are destined to target an !offscreen drawable, then uploading the
    trapezoid mask to a bo is the last thing we actually want to do...
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index 16657aa..4958dd8 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -1553,15 +1553,15 @@ uxa_create_alpha_picture(ScreenPtr pScreen,
  * uxa_check_poly_fill_rect to initialize the contents.
  */
 void
-uxa_trapezoids(CARD8 op, PicturePtr pSrc, PicturePtr pDst,
+uxa_trapezoids(CARD8 op, PicturePtr src, PicturePtr dst,
 	       PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
 	       int ntrap, xTrapezoid * traps)
 {
-	ScreenPtr pScreen = pDst->pDrawable->pScreen;
-	PictureScreenPtr ps = GetPictureScreen(pScreen);
+	ScreenPtr screen = dst->pDrawable->pScreen;
 	BoxRec bounds;
-	Bool direct = op == PictOpAdd && miIsSolidAlpha(pSrc);
+	Bool direct;
 
+	direct = op == PictOpAdd && miIsSolidAlpha(src);
 	if (maskFormat || direct) {
 		miTrapezoidBounds(ntrap, traps, &bounds);
 
@@ -1573,7 +1573,7 @@ uxa_trapezoids(CARD8 op, PicturePtr pSrc, PicturePtr pDst,
 	 * Check for solid alpha add
 	 */
 	if (direct) {
-		DrawablePtr pDraw = pDst->pDrawable;
+		DrawablePtr pDraw = dst->pDrawable;
 		PixmapPtr pixmap = uxa_get_drawable_pixmap(pDraw);
 		int xoff, yoff;
 
@@ -1583,12 +1583,15 @@ uxa_trapezoids(CARD8 op, PicturePtr pSrc, PicturePtr pDst,
 		yoff += pDraw->y;
 
 		if (uxa_prepare_access(pDraw, UXA_ACCESS_RW)) {
+			PictureScreenPtr ps = GetPictureScreen(screen);
+
 			for (; ntrap; ntrap--, traps++)
-				(*ps->RasterizeTrapezoid) (pDst, traps, 0, 0);
+				(*ps->RasterizeTrapezoid) (dst, traps, 0, 0);
 			uxa_finish_access(pDraw);
 		}
 	} else if (maskFormat) {
-		PicturePtr pPicture;
+		PixmapPtr scratch = NULL;
+		PicturePtr mask;
 		INT16 xDst, yDst;
 		INT16 xRel, yRel;
 		int width, height;
@@ -1612,28 +1615,48 @@ uxa_trapezoids(CARD8 op, PicturePtr pSrc, PicturePtr pDst,
 			pixman_rasterize_trapezoid(image,
 						   (pixman_trapezoid_t *) traps,
 						   -bounds.x1, -bounds.y1);
-
-		pPicture =
-		    uxa_picture_from_pixman_image(pScreen, image, format);
-		pixman_image_unref(image);
-		if (!pPicture)
+		if (uxa_drawable_is_offscreen(dst->pDrawable)) {
+			mask = uxa_picture_from_pixman_image(screen, image, format);
+		} else {
+			int error;
+
+			scratch = GetScratchPixmapHeader(screen, width, height,
+							PIXMAN_FORMAT_DEPTH(format),
+							PIXMAN_FORMAT_BPP(format),
+							pixman_image_get_stride(image),
+							pixman_image_get_data(image));
+			mask = CreatePicture(0, &scratch->drawable,
+					     PictureMatchFormat(screen,
+								PIXMAN_FORMAT_DEPTH(format),
+								format),
+					     0, 0, serverClient, &error);
+		}
+		if (!mask) {
+			if (scratch)
+				FreeScratchPixmapHeader(scratch);
+			pixman_image_unref(image);
 			return;
+		}
 
 		xRel = bounds.x1 + xSrc - xDst;
 		yRel = bounds.y1 + ySrc - yDst;
-		CompositePicture(op, pSrc, pPicture, pDst,
+		CompositePicture(op, src, mask, dst,
 				 xRel, yRel,
 				 0, 0,
 				 bounds.x1, bounds.y1,
 				 width, height);
-		FreePicture(pPicture, 0);
+		FreePicture(mask, 0);
+
+		if (scratch)
+			FreeScratchPixmapHeader(scratch);
+		pixman_image_unref(image);
 	} else {
-		if (pDst->polyEdge == PolyEdgeSharp)
-			maskFormat = PictureMatchFormat(pScreen, 1, PICT_a1);
+		if (dst->polyEdge == PolyEdgeSharp)
+			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
 		else
-			maskFormat = PictureMatchFormat(pScreen, 8, PICT_a8);
+			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
 		for (; ntrap; ntrap--, traps++)
-			uxa_trapezoids(op, pSrc, pDst, maskFormat, xSrc, ySrc,
+			uxa_trapezoids(op, src, dst, maskFormat, xSrc, ySrc,
 				       1, traps);
 	}
 }
commit 00664b8f9d3da8d0d6aa53471ed3a8a8f6391660
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 12 12:41:51 2010 +0100

    uxa: Fallback when compositing to a !offscreen destination
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index ee91b5c..16657aa 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -1324,6 +1324,9 @@ uxa_composite(CARD8 op,
 	if (uxa_screen->swappedOut)
 		goto fallback;
 
+	if (!uxa_drawable_is_offscreen(pDst->pDrawable))
+	    goto fallback;
+
 	/* Remove repeat in source if useless */
 	if (pSrc->pDrawable && pSrc->repeat &&
 	    transform_is_integer_translation(pSrc->transform, &tx, &ty) &&
commit 0c6372a77fd8c051534c1b420a02a8737aa1dd01
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 12 09:57:27 2010 +0100

    i830: Prevent allocation of bo larger than half the aperture
    
    We need to prevent overcommitting the aperture, and in particular if we
    allocate a buffer larger than available space we will fail to mmap it in
    and rendering will fail. Trying to allocate multiple large buffers in
    the aperture, often the case when falling back, causes thrashes and
    eviction of useful buffers. So from the outset simply do not allocate a
    bo if the the required size is more than half the available aperture
    space.
    
    Fixes allocation failure in ocitymap.trace for instance.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i830.h b/src/i830.h
index aa7fedc..fdaa47e 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -296,6 +296,7 @@ typedef struct intel_screen_private {
 	int accel_pixmap_offset_alignment;
 	int accel_max_x;
 	int accel_max_y;
+	int max_bo_size;
 	int max_gtt_map_size;
 	int max_tiling_size;
 
diff --git a/src/i830_memory.c b/src/i830_memory.c
index 6a73bf6..611b548 100644
--- a/src/i830_memory.c
+++ b/src/i830_memory.c
@@ -274,50 +274,55 @@ drm_intel_bo *i830_allocate_framebuffer(ScrnInfoPtr scrn)
 	return front_buffer;
 }
 
-static void i830_set_max_gtt_map_size(ScrnInfoPtr scrn)
+static void i830_set_max_bo_size(intel_screen_private *intel,
+				 const struct drm_i915_gem_get_aperture *aperture)
 {
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-	struct drm_i915_gem_get_aperture aperture;
-	int ret;
-
-	/* Default low value in case it gets used during server init. */
-	intel->max_gtt_map_size = 16 * 1024 * 1024;
+	if (aperture->aper_available_size)
+		/* Large BOs will tend to hit SW fallbacks frequently, and also will
+		 * tend to fail to successfully map when doing SW fallbacks because we
+		 * overcommit address space for BO access, or worse cause aperture
+		 * thrashing.
+		 */
+		intel->max_bo_size = aperture->aper_available_size / 2;
+	else
+		intel->max_bo_size = 64 * 1024 * 1024;
+}
 
-	ret =
-	    ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
-	if (ret == 0) {
+static void i830_set_max_gtt_map_size(intel_screen_private *intel,
+				      const struct drm_i915_gem_get_aperture *aperture)
+{
+	if (aperture->aper_available_size)
 		/* Let objects up get bound up to the size where only 2 would fit in
 		 * the aperture, but then leave slop to account for alignment like
 		 * libdrm does.
 		 */
 		intel->max_gtt_map_size =
-		    aperture.aper_available_size * 3 / 4 / 2;
-	}
+			aperture->aper_available_size * 3 / 4 / 2;
+	else
+		intel->max_gtt_map_size = 16 * 1024 * 1024;
 }
 
-static void i830_set_max_tiling_size(ScrnInfoPtr scrn)
+static void i830_set_max_tiling_size(intel_screen_private *intel,
+				     const struct drm_i915_gem_get_aperture *aperture)
 {
-	intel_screen_private *intel = intel_get_screen_private(scrn);
-	struct drm_i915_gem_get_aperture aperture;
-	int ret;
-
-	/* Default low value in case it gets used during server init. */
-	intel->max_tiling_size = 4 * 1024 * 1024;
-
-	ret =
-	    ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
-	if (ret == 0) {
+	if (aperture->aper_available_size)
 		/* Let objects be tiled up to the size where only 4 would fit in
 		 * the aperture, presuming worst case alignment.
 		 */
-		intel->max_tiling_size = aperture.aper_available_size / 4;
-		if (!IS_I965G(intel))
-			intel->max_tiling_size /= 2;
-	}
+		intel->max_tiling_size = aperture->aper_available_size / 4;
+	else
+		intel->max_tiling_size = 4 * 1024 * 1024;
 }
 
 void i830_set_gem_max_sizes(ScrnInfoPtr scrn)
 {
-	i830_set_max_gtt_map_size(scrn);
-	i830_set_max_tiling_size(scrn);
+	intel_screen_private *intel = intel_get_screen_private(scrn);
+	struct drm_i915_gem_get_aperture aperture;
+
+	aperture.aper_available_size = 0;
+	ioctl(intel->drmSubFD, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
+
+	i830_set_max_bo_size(intel, &aperture);
+	i830_set_max_gtt_map_size(intel, &aperture);
+	i830_set_max_tiling_size(intel, &aperture);
 }
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index b5fc6b8..3f9610e 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -944,18 +944,13 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 		}
 		size = i830_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride);
 
-		/* Fail very large allocations on 32-bit systems.  Large BOs will
-		 * tend to hit SW fallbacks frequently, and also will tend to fail
-		 * to successfully map when doing SW fallbacks because we overcommit
-		 * address space for BO access.
-		 *
-		 * Note that size should fit in 32 bits.  We throw out >32767x32767x4,
-		 * and pitch alignment could get us up to 32768x32767x4.
+		/* Fail very large allocations.  Large BOs will tend to hit SW fallbacks
+		 * frequently, and also will tend to fail to successfully map when doing
+		 * SW fallbacks because we overcommit address space for BO access.
 		 */
-		if (sizeof(unsigned long) == 4 &&
-		    size > (unsigned int)(1024 * 1024 * 1024)) {
+		if (size > intel->max_bo_size) {
 			fbDestroyPixmap(pixmap);
-			return NullPixmap;
+			return fbCreatePixmap(screen, w, h, depth, usage);
 		}
 
 		/* Perform a preliminary search for an in-flight bo */
commit 244b7cbfffdcda4761948eaa37ed2a30ca81f107
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 12 09:18:30 2010 +0100

    uxa: Use accelerated PutImage for uploading pixman images.
    
    Short-circuits the current use of PutImage from CopyArea, bypassing all
    the temporary allocations.

diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index f8105af..ee91b5c 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -450,74 +450,84 @@ uxa_picture_for_pixman_format(ScreenPtr pScreen,
 }
 
 static PicturePtr
-uxa_picture_from_pixman_image(ScreenPtr pScreen,
+uxa_picture_from_pixman_image(ScreenPtr screen,
 			      pixman_image_t * image,
 			      pixman_format_code_t format)
 {
-	PicturePtr pPicture;
-	PixmapPtr pPixmap;
+	uxa_screen_t *uxa_screen = uxa_get_screen(screen);
+	PicturePtr picture;
+	PixmapPtr pixmap;
 	int width, height;
 
 	width = pixman_image_get_width(image);
 	height = pixman_image_get_height(image);
 
-	pPicture = uxa_picture_for_pixman_format(pScreen, format,
-						 width, height);
-	if (!pPicture)
+	picture = uxa_picture_for_pixman_format(screen, format,
+						width, height);
+	if (!picture)
 		return 0;
 
-	pPixmap = GetScratchPixmapHeader(pScreen, width, height,
-					 PIXMAN_FORMAT_DEPTH(format),
-					 PIXMAN_FORMAT_BPP(format),
-					 pixman_image_get_stride(image),
-					 pixman_image_get_data(image));
-	if (!pPixmap) {
-		FreePicture(pPicture, 0);
+	if (uxa_screen->info->put_image &&
+	    ((picture->pDrawable->depth << 24) | picture->format) == format &&
+	    uxa_screen->info->put_image((PixmapPtr)picture->pDrawable,
+					0, 0,
+					width, height,
+					(char *)pixman_image_get_data(image),
+					pixman_image_get_stride(image)))
+		return picture;
+
+	pixmap = GetScratchPixmapHeader(screen, width, height,
+					PIXMAN_FORMAT_DEPTH(format),
+					PIXMAN_FORMAT_BPP(format),
+					pixman_image_get_stride(image),
+					pixman_image_get_data(image));
+	if (!pixmap) {
+		FreePicture(picture, 0);
 		return 0;
 	}
 
-	if (((pPicture->pDrawable->depth << 24) | pPicture->format) == format) {
-	    GCPtr pGC;
+	if (((picture->pDrawable->depth << 24) | picture->format) == format) {
+		GCPtr gc;
 
-	    pGC = GetScratchGC(PIXMAN_FORMAT_DEPTH(format), pScreen);
-	    if (!pGC) {
-		FreeScratchPixmapHeader(pPixmap);
-		FreePicture(pPicture, 0);
-		return 0;
-	    }
-	    ValidateGC(pPicture->pDrawable, pGC);
+		gc = GetScratchGC(PIXMAN_FORMAT_DEPTH(format), screen);
+		if (!gc) {
+			FreeScratchPixmapHeader(pixmap);
+			FreePicture(picture, 0);
+			return 0;
+		}
+		ValidateGC(picture->pDrawable, gc);
 
-	    (*pGC->ops->CopyArea) (&pPixmap->drawable, pPicture->pDrawable,
-				   pGC, 0, 0, width, height, 0, 0);
+		(*gc->ops->CopyArea) (&pixmap->drawable, picture->pDrawable,
+				      gc, 0, 0, width, height, 0, 0);
 
-	    FreeScratchGC(pGC);
+		FreeScratchGC(gc);
 	} else {
-	    PicturePtr pSrc;
-	    int error;
-
-	    pSrc = CreatePicture(0, &pPixmap->drawable,
-				 PictureMatchFormat(pScreen,
-						    PIXMAN_FORMAT_DEPTH(format),
-						    format),
-				 0, 0, serverClient, &error);
-	    if (!pSrc) {
-		FreeScratchPixmapHeader(pPixmap);
-		FreePicture(pPicture, 0);
-		return 0;
-	    }
-	    ValidatePicture(pSrc);
+		PicturePtr src;
+		int error;
+
+		src = CreatePicture(0, &pixmap->drawable,
+				    PictureMatchFormat(screen,
+						       PIXMAN_FORMAT_DEPTH(format),
+						       format),
+				    0, 0, serverClient, &error);
+		if (!src) {
+			FreeScratchPixmapHeader(pixmap);
+			FreePicture(picture, 0);
+			return 0;
+		}
+		ValidatePicture(src);
 
-	    if (uxa_prepare_access(pPicture->pDrawable, UXA_ACCESS_RW)) {
-		fbComposite(PictOpSrc, pSrc, NULL, pPicture,
-			    0, 0, 0, 0, 0, 0, width, height);
-		uxa_finish_access(pPicture->pDrawable);
-	    }
+		if (uxa_prepare_access(picture->pDrawable, UXA_ACCESS_RW)) {
+			fbComposite(PictOpSrc, src, NULL, picture,
+				    0, 0, 0, 0, 0, 0, width, height);
+			uxa_finish_access(picture->pDrawable);
+		}
 
-	    FreePicture(pSrc, 0);
+		FreePicture(src, 0);
 	}
-	FreeScratchPixmapHeader(pPixmap);
+	FreeScratchPixmapHeader(pixmap);
 
-	return pPicture;
+	return picture;
 }
 
 static PicturePtr
commit cb887cfc670bf63993bd313ff33927afb8198eae
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 26 09:59:51 2010 +0000

    uxa: solid rects
    
    The cost of performing relocations outweigh the advantages of using the
    blitter for solids with lots of rectangles.
    
    References:
    
      Bug 22127 - [UXA] 50% performance regression for XRenderFillRectangles
      https://bugs.freedesktop.org/show_bug.cgi?id=22127
    
    By using the 3D pipeline we improve our performance by around 4x on
    i945, measured by the jxbench microbenchmark, and a factor of 10x by
    short-cutting to the 3D pipeline for blended rectangles.
    
    Before, on a i945GME:
      19982.412060 Ops/s; rects (!); 15x15
      9599.131693 Ops/s; rects (!); 75x75
      3803.654743 Ops/s; rects (!); 250x250
      6836.743772 Ops/s; rects blended; 15x15
      1443.750000 Ops/s; rects blended; 75x75
      495.335821 Ops/s; rects blended; 250x250
      23247.933884 Ops/s; rects composition (!); 15x15
      10993.073048 Ops/s; rects composition (!); 75x75
      3595.905172 Ops/s; rects composition (!); 250x250
    
    After:
      87271.145975 Ops/s; rects (!); 15x15
      32347.744361 Ops/s; rects (!); 75x75
      5884.177215 Ops/s; rects (!); 250x250
      73500.000000 Ops/s; rects blended; 15x15
      33580.882353 Ops/s; rects blended; 75x75
      5858.811749 Ops/s; rects blended; 250x250
      25582.317073 Ops/s; rects composition (!); 15x15
      6664.728682 Ops/s; rects composition (!); 75x75
      14965.909091 Ops/s; rects composition (!); 250x250 [suspicious]
    
    This has no impact on Cairo, but I have a suspicion from watching xtrace
    that Qt likes to blit thousands of 1x1 rectangles with the same colour.
    However, we are still around 2-3x slower than the reported figures for
    EXA!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-priv.h b/uxa/uxa-priv.h
index c1f3688..a4763b4 100644
--- a/uxa/uxa-priv.h
+++ b/uxa/uxa-priv.h
@@ -156,6 +156,7 @@ typedef struct {
 	BitmapToRegionProcPtr SavedBitmapToRegion;
 #ifdef RENDER
 	CompositeProcPtr SavedComposite;
+	CompositeRectsProcPtr SavedCompositeRects;
 	TrianglesProcPtr SavedTriangles;
 	GlyphsProcPtr SavedGlyphs;
 	TrapezoidsProcPtr SavedTrapezoids;
@@ -417,6 +418,13 @@ uxa_composite_rects(CARD8 op,
 		    PicturePtr pDst, int nrect, uxa_composite_rect_t * rects);
 
 void
+uxa_solid_rects (CARD8		op,
+		 PicturePtr	dst,
+		 xRenderColor  *color,
+		 int		num_rects,
+		 xRectangle    *rects);
+
+void
 uxa_trapezoids(CARD8 op, PicturePtr pSrc, PicturePtr pDst,
 	       PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc,
 	       int ntrap, xTrapezoid * traps);
diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index 244d274..f8105af 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -828,6 +828,75 @@ uxa_acquire_mask(ScreenPtr screen,
 				    out_x, out_y);
 }
 
+void
+uxa_solid_rects (CARD8		op,
+		 PicturePtr	dst,
+		 xRenderColor  *color,
+		 int		num_rects,
+		 xRectangle    *rects)
+{
+	ScreenPtr screen = dst->pDrawable->pScreen;
+	uxa_screen_t *uxa_screen = uxa_get_screen(screen);
+	PixmapPtr pixmap;
+	int dst_x, dst_y;
+	PicturePtr src;
+	int error;
+
+	/* Using GEM, the relocation costs outweigh the advantages of the blitter */
+	if (num_rects == 1)
+		goto fallback;
+
+	if (dst->alphaMap)
+		goto fallback;
+
+	if (!uxa_screen->info->check_composite_texture)
+		goto fallback;
+
+	pixmap = uxa_get_offscreen_pixmap(dst->pDrawable, &dst_x, &dst_y);
+	if (!pixmap)
+		goto fallback;
+
+	if (op == PictOpClear)
+		color->red = color->green = color->blue = color->alpha = 0;
+	if (PICT_FORMAT_A(dst->format) == 0)
+		color->alpha = 0xffff;
+	if (color->alpha >= 0xff00 && op == PictOpOver)
+		op = PictOpSrc;
+
+	src = CreateSolidPicture(0, color, &error);
+	if (!src)
+		goto fallback;
+
+	if (!uxa_screen->info->check_composite(op, src, NULL, dst) ||
+	    !uxa_screen->info->check_composite_texture(screen, src)) {
+		FreePicture(src, 0);
+		goto fallback;
+	}
+
+	if (!uxa_screen->info->prepare_composite(op, src, NULL, dst, NULL, NULL, pixmap)) {
+		FreePicture(src, 0);
+		goto fallback;
+	}
+
+	while (num_rects--) {
+		uxa_screen->info->composite(pixmap,
+					    0, 0, 0, 0,
+					    rects->x + dst_x,
+					    rects->y + dst_y,
+					    rects->width,
+					    rects->height);
+		rects++;
+	}
+
+	uxa_screen->info->done_composite(pixmap);
+	FreePicture(src, 0);
+
+	return;
+
+fallback:
+	uxa_screen->SavedCompositeRects(op, dst, color, num_rects, rects);
+}
+
 static int
 uxa_try_driver_composite_rects(CARD8 op,
 			       PicturePtr pSrc,
diff --git a/uxa/uxa.c b/uxa/uxa.c
index d6ad5a6..9ea1466 100644
--- a/uxa/uxa.c
+++ b/uxa/uxa.c
@@ -388,6 +388,7 @@ static Bool uxa_close_screen(int i, ScreenPtr pScreen)
 #ifdef RENDER
 	if (ps) {
 		ps->Composite = uxa_screen->SavedComposite;
+		ps->CompositeRects = uxa_screen->SavedCompositeRects;
 		ps->Glyphs = uxa_screen->SavedGlyphs;
 		ps->Trapezoids = uxa_screen->SavedTrapezoids;
 		ps->AddTraps = uxa_screen->SavedAddTraps;
@@ -517,6 +518,9 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver)
 		uxa_screen->SavedComposite = ps->Composite;
 		ps->Composite = uxa_composite;
 
+		uxa_screen->SavedCompositeRects = ps->CompositeRects;
+		ps->CompositeRects = uxa_solid_rects;
+
 		uxa_screen->SavedGlyphs = ps->Glyphs;
 		ps->Glyphs = uxa_glyphs;
 
commit c8e10f7791fecf0fed7ad606db062d87d8ff263c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed May 12 12:41:00 2010 +0100

    debug: Add names for operators
    
    Most useful for confirming my worst fears: unwarranted use of
    OutReverse + Add.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/uxa/uxa-render.c b/uxa/uxa-render.c
index 2086ae5..244d274 100644
--- a/uxa/uxa-render.c
+++ b/uxa/uxa-render.c
@@ -85,29 +85,88 @@ static void uxa_composite_fallback_pict_desc(PicturePtr pict, char *string,
 		 pict->alphaMap ? " with alpha map" :"");
 }
 
+static const char *
+op_to_string(CARD8 op)
+{
+    switch (op) {
+#define C(x) case PictOp##x: return #x
+	C(Clear);
+	C(Src);
+	C(Dst);
+	C(Over);
+	C(OverReverse);
+	C(In);
+	C(InReverse);
+	C(Out);
+	C(OutReverse);
+	C(Atop);
+	C(AtopReverse);
+	C(Xor);
+	C(Add);
+	C(Saturate);
+
+	/*
+	 * Operators only available in version 0.2
+	 */
+	C(DisjointClear);
+	C(DisjointSrc);
+	C(DisjointDst);
+	C(DisjointOver);
+	C(DisjointOverReverse);
+	C(DisjointIn);
+	C(DisjointInReverse);
+	C(DisjointOut);
+	C(DisjointOutReverse);
+	C(DisjointAtop);
+	C(DisjointAtopReverse);
+	C(DisjointXor);
+
+	C(ConjointClear);
+	C(ConjointSrc);
+	C(ConjointDst);
+	C(ConjointOver);
+	C(ConjointOverReverse);
+	C(ConjointIn);
+	C(ConjointInReverse);
+	C(ConjointOut);
+	C(ConjointOutReverse);
+	C(ConjointAtop);
+	C(ConjointAtopReverse);
+	C(ConjointXor);
+
+	/*
+	 * Operators only available in version 0.11
+	 */
+	C(Multiply);
+	C(Screen);
+	C(Overlay);
+	C(Darken);
+	C(Lighten);
+	C(ColorDodge);
+	C(ColorBurn);
+	C(HardLight);
+	C(SoftLight);
+	C(Difference);
+	C(Exclusion);
+	C(HSLHue);
+	C(HSLSaturation);
+	C(HSLColor);
+	C(HSLLuminosity);
+    default: return "garbage";
+#undef C
+    }
+}
+
 static void
 uxa_print_composite_fallback(const char *func, CARD8 op,
 			     PicturePtr pSrc, PicturePtr pMask, PicturePtr pDst)
 {
 	uxa_screen_t *uxa_screen = uxa_get_screen(pDst->pDrawable->pScreen);
-	char sop[20];
 	char srcdesc[40], maskdesc[40], dstdesc[40];
 
 	if (! uxa_screen->fallback_debug)
 		return;
 
-	switch (op) {
-	case PictOpSrc:
-		sprintf(sop, "Src");
-		break;
-	case PictOpOver:
-		sprintf(sop, "Over");
-		break;
-	default:
-		sprintf(sop, "0x%x", (int)op);
-		break;
-	}
-
 	uxa_composite_fallback_pict_desc(pSrc, srcdesc, 40);
 	uxa_composite_fallback_pict_desc(pMask, maskdesc, 40);
 	uxa_composite_fallback_pict_desc(pDst, dstdesc, 40);
@@ -118,7 +177,7 @@ uxa_print_composite_fallback(const char *func, CARD8 op,
 	       "  mask %s, \n"
 	       "  dst  %s, \n"
 	       "  screen %s\n",
-	       func, sop, srcdesc, maskdesc, dstdesc,
+	       func, op_to_string (op), srcdesc, maskdesc, dstdesc,
 	       uxa_screen->swappedOut ? "swapped out" : "normal");
 }
 


More information about the xorg-commit mailing list