xf86-video-intel: 4 commits - src/i830_batchbuffer.c src/i830_driver.c src/i830.h src/i830_uxa.c uxa/uxa-accel.c

Chris Wilson ickle at kemper.freedesktop.org
Thu Apr 15 14:18:44 PDT 2010


 src/i830.h             |   13 +++-
 src/i830_batchbuffer.c |   12 ++++
 src/i830_driver.c      |    1 
 src/i830_uxa.c         |  135 +++++++++++++++++++++++++++++++------------------
 uxa/uxa-accel.c        |   51 +++++++++++++++---
 5 files changed, 154 insertions(+), 58 deletions(-)

New commits:
commit c374c94e41d6e7d677334171e3255778d77cbe18
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Mar 31 11:50:27 2010 +0100

    uxa: Reuse in-flight bo
    
    When we need to allocate a new bo for use as a gpu target, first check
    if we can reuse a pixmap that has already been relocated into the
    aperture as a temporary target, for instance a glyph mask or a clip mask.
    
    Before:
    backend                      test   min(s) median(s) stddev.
    xlib         firefox-planet-gnome   50.568   50.873   0.30%
     xcb         firefox-planet-gnome   49.686   53.003   3.92%
    xlib                    evolution   40.115   40.131   0.86%
     xcb                    evolution   28.241   28.285   0.18%
    
    After:
    backend                      test   min(s) median(s) stddev.
    xlib         firefox-planet-gnome   47.759   48.233   0.80%
     xcb         firefox-planet-gnome   48.611   48.657   0.87%
    xlib                    evolution   38.954   38.991   0.05%
     xcb                    evolution   26.561   26.654   0.19%
    
    And even more dramatic improvements when using a font size larger than
    the maximum size of the glyph cache:
     xcb firefox-36-20090611:  1.79x speedup
    xlib firefox-36-20090611:  1.74x speedup
     xcb firefox-36-20090609:  1.62x speedup
    xlib firefox-36-20090609:  1.59x speedup
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i830.h b/src/i830.h
index 43c5887..88949f6 100644
--- a/src/i830.h
+++ b/src/i830.h
@@ -136,14 +136,22 @@ list_is_empty(struct list *head)
 #define list_first_entry(ptr, type, member) \
 	list_entry((ptr)->next, type, member)
 
+#define list_foreach(pos, head)			\
+	for (pos = (head)->next; pos != (head);	pos = pos->next)
+
+#define list_foreach_entry(pos, type, head, member)		\
+	for (pos = list_entry((head)->next, type, member);\
+	     &pos->member != (head);					\
+	     pos = list_entry(pos->member.next, type, member))
+
 struct intel_pixmap {
 	dri_bo *bo;
-	uint32_t tiling;
+	uint32_t tiling, stride;
 	uint32_t flush_write_domain;
 	uint32_t flush_read_domains;
 	uint32_t batch_write_domain;
 	uint32_t batch_read_domains;
-	struct list flush, batch;
+	struct list flush, batch, in_flight;
 };
 
 struct intel_pixmap *i830_get_pixmap_intel(PixmapPtr pixmap);
@@ -252,6 +260,7 @@ typedef struct intel_screen_private {
 	int batch_atomic_limit;
 	struct list batch_pixmaps;
 	struct list flush_pixmaps;
+	struct list in_flight;
 
 	/* For Xvideo */
 	Bool use_drmmode_overlay;
diff --git a/src/i830_batchbuffer.c b/src/i830_batchbuffer.c
index a02e980..492472e 100644
--- a/src/i830_batchbuffer.c
+++ b/src/i830_batchbuffer.c
@@ -195,6 +195,18 @@ void intel_batch_submit(ScrnInfoPtr scrn)
 		list_del(&entry->flush);
 	}
 
+	while (!list_is_empty(&intel->in_flight)) {
+		struct intel_pixmap *entry;
+
+		entry = list_first_entry(&intel->in_flight,
+					 struct intel_pixmap,
+					 in_flight);
+
+		dri_bo_unreference(entry->bo);
+		list_del(&entry->in_flight);
+		xfree(entry);
+	}
+
 	/* Save a ref to the last batch emitted, which we use for syncing
 	 * in debug code.
 	 */
diff --git a/src/i830_driver.c b/src/i830_driver.c
index d0ce552..db8af06 100644
--- a/src/i830_driver.c
+++ b/src/i830_driver.c
@@ -1054,6 +1054,7 @@ void i830_init_bufmgr(ScrnInfoPtr scrn)
 
 	list_init(&intel->batch_pixmaps);
 	list_init(&intel->flush_pixmaps);
+	list_init(&intel->in_flight);
 }
 
 Bool i830_crtc_on(xf86CrtcPtr crtc)
diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index f3f0f65..984069e 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -127,7 +127,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 {
 	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
 	intel_screen_private *intel = intel_get_screen_private(scrn);
-	int pitch_align;
+	int pitch, pitch_align;
 	int size;
 
 	if (*tiling != I915_TILING_NONE) {
@@ -151,6 +151,9 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 		}
 	}
 
+	pitch = (w * pixmap->drawable.bitsPerPixel + 7) / 8;
+	if (pitch <= 256)
+		*tiling = I915_TILING_NONE;
   repeat:
 	if (*tiling == I915_TILING_NONE) {
 		pitch_align = intel->accel_pixmap_pitch_alignment;
@@ -158,8 +161,7 @@ i830_uxa_pixmap_compute_size(PixmapPtr pixmap,
 		pitch_align = 512;
 	}
 
-	*stride = ROUND_TO((w * pixmap->drawable.bitsPerPixel + 7) / 8,
-			   pitch_align);
+	*stride = ROUND_TO(pitch, pitch_align);
 
 	if (*tiling == I915_TILING_NONE) {
 		/* Round the height up so that the GPU's access to a 2x2 aligned
@@ -548,17 +550,19 @@ dri_bo *i830_get_pixmap_bo(PixmapPtr pixmap)
 
 void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
 {
+	ScrnInfoPtr scrn = xf86Screens[pixmap->drawable.pScreen->myNum];
+	intel_screen_private *intel = intel_get_screen_private(scrn);
 	struct intel_pixmap *priv;
 
 	priv = i830_get_pixmap_intel(pixmap);
 
 	if (priv != NULL) {
-		dri_bo_unreference(priv->bo);
-
-		priv->flush_read_domains = priv->flush_write_domain = 0;
-		priv->batch_read_domains = priv->batch_write_domain = 0;
-		list_del(&priv->batch);
-		list_del(&priv->flush);
+		if (list_is_empty(&priv->batch)) {
+			dri_bo_unreference(priv->bo);
+		} else {
+			list_add(&priv->in_flight, &intel->in_flight);
+			priv = NULL;
+		}
 	}
 
 	if (bo != NULL) {
@@ -576,6 +580,7 @@ void i830_set_pixmap_bo(PixmapPtr pixmap, dri_bo * bo)
 
 		dri_bo_reference(bo);
 		priv->bo = bo;
+		priv->stride = i830_pixmap_pitch(pixmap);
 
 		ret = drm_intel_bo_get_tiling(bo,
 					      &priv->tiling,
@@ -883,35 +888,26 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 
 	if (w && h) {
 		struct intel_pixmap *priv;
-		unsigned int size;
+		unsigned int size, tiling;
 		int stride;
 
-		priv = xcalloc(1, sizeof (struct intel_pixmap));
-		if (priv == NULL) {
-			fbDestroyPixmap(pixmap);
-			return NullPixmap;
-		}
-
 		/* Always attempt to tile, compute_size() will remove the
 		 * tiling for pixmaps that are either too large or too small
 		 * to be effectively tiled.
 		 */
-		priv->tiling = I915_TILING_X;
+		tiling = I915_TILING_X;
 		if (usage == INTEL_CREATE_PIXMAP_TILING_Y)
-			priv->tiling = I915_TILING_Y;
+			tiling = I915_TILING_Y;
 		if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
-			priv->tiling = I915_TILING_NONE;
-
-		if (priv->tiling != I915_TILING_NONE) {
-		    if (w < 256)
-			priv->tiling = I915_TILING_NONE;
-		    if (h < 8)
-			priv->tiling = I915_TILING_NONE;
-		    if (h < 32 && priv->tiling == I915_TILING_Y)
-			priv->tiling = I915_TILING_X;
+			tiling = I915_TILING_NONE;
+
+		if (tiling != I915_TILING_NONE) {
+		    if (h <= 4)
+			tiling = I915_TILING_NONE;
+		    if (h <= 16 && tiling == I915_TILING_Y)
+			tiling = I915_TILING_X;
 		}
-		size = i830_uxa_pixmap_compute_size(pixmap, w, h,
-						    &priv->tiling, &stride);
+		size = i830_uxa_pixmap_compute_size(pixmap, w, h, &tiling, &stride);
 
 		/* Fail very large allocations on 32-bit systems.  Large BOs will
 		 * tend to hit SW fallbacks frequently, and also will tend to fail
@@ -923,7 +919,37 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 		 */
 		if (sizeof(unsigned long) == 4 &&
 		    size > (unsigned int)(1024 * 1024 * 1024)) {
-			xfree(priv);
+			fbDestroyPixmap(pixmap);
+			return NullPixmap;
+		}
+
+		/* Perform a premilinary search for an in-flight bo */
+		if (usage != UXA_CREATE_PIXMAP_FOR_MAP) {
+			int aligned_h;
+
+			if (tiling == I915_TILING_X)
+				aligned_h = ALIGN(h, 8);
+			else if (tiling == I915_TILING_Y)
+				aligned_h = ALIGN(h, 32);
+			else
+				aligned_h = ALIGN(h, 2);
+
+			list_foreach_entry(priv, struct intel_pixmap,
+					   &intel->in_flight,
+					   in_flight) {
+				if (priv->tiling == tiling &&
+				    priv->stride >= stride &&
+				    priv->bo->size >= priv->stride * aligned_h) {
+					list_del(&priv->in_flight);
+					screen->ModifyPixmapHeader(pixmap, w, h, 0, 0, priv->stride, NULL);
+					i830_uxa_set_pixmap_intel(pixmap, priv);
+					return pixmap;
+				}
+			}
+		}
+
+		priv = xcalloc(1, sizeof (struct intel_pixmap));
+		if (priv == NULL) {
 			fbDestroyPixmap(pixmap);
 			return NullPixmap;
 		}
@@ -941,6 +967,8 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 			return NullPixmap;
 		}
 
+		priv->stride = stride;
+		priv->tiling = tiling;
 		if (priv->tiling != I915_TILING_NONE)
 			drm_intel_bo_set_tiling(priv->bo,
 						&priv->tiling,
commit 96aa7a236ac0605324a94f7b7d10413cb219f071
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 14 15:04:53 2010 +0100

    i830: Allocate bo's for glyphs larger than 32x32.
    
    As we only use the glyph cache for small glyphs, those large than 32x32
    will first be copied to a bo and used as a mask in a composite
    operation. We can avoid the allocation and upload per use by allocating
    a bo for the over-sized glyph from the start. As the glyph is large
    anyway, the excess memory allocation is less significant.
    
    Using normal font sizes, firefox shows no change - as expected. However,
    using the 36 font size traces, we see around a 10% improvement on g45.
    
    Before:
          xcb          firefox-36-20090609  127.333  127.897   0.22%
          xcb          firefox-36-20090611   87.456   88.624   0.66%
          xcb             firefox-20090601   19.522   20.194   1.69%
         xlib          firefox-36-20090609  201.054  201.780   0.18%
         xlib          firefox-36-20090611  133.468  133.717   0.09%
         xlib             firefox-20090601   23.740   23.975   0.49%
    
    With large glyphs in bo:
          xcb          firefox-36-20090609  117.256  118.254   0.42%
          xcb          firefox-36-20090611   79.462   79.962   0.31%
          xcb             firefox-20090601   19.658   20.024   0.92%
         xlib          firefox-36-20090609  185.645  188.202   0.68%
         xlib          firefox-36-20090611  123.592  124.940   0.54%
         xlib             firefox-20090601   23.917   24.098   0.38%
    
    Thanks to Owain G. Ainsworth for the suggestion!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 58ed491..f3f0f65 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -876,7 +876,7 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 	if (w > 32767 || h > 32767)
 		return NullPixmap;
 
-	if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE)
+	if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE && w <= 32 && h <= 32)
 		return fbCreatePixmap(screen, w, h, depth, usage);
 
 	pixmap = fbCreatePixmap(screen, 0, 0, depth, usage);
@@ -902,6 +902,14 @@ i830_uxa_create_pixmap(ScreenPtr screen, int w, int h, int depth,
 		if (usage == UXA_CREATE_PIXMAP_FOR_MAP)
 			priv->tiling = I915_TILING_NONE;
 
+		if (priv->tiling != I915_TILING_NONE) {
+		    if (w < 256)
+			priv->tiling = I915_TILING_NONE;
+		    if (h < 8)
+			priv->tiling = I915_TILING_NONE;
+		    if (h < 32 && priv->tiling == I915_TILING_Y)
+			priv->tiling = I915_TILING_X;
+		}
 		size = i830_uxa_pixmap_compute_size(pixmap, w, h,
 						    &priv->tiling, &stride);
 
commit 2d17bd50af367bead84985c22fdd43d264a5f072
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 14 14:54:34 2010 +0100

    Revert "Revert "uxa: Try using put_image when copying from a memory buffer.""
    
    This reverts commit 6d50553e8f70d8f2142efdfd6c90bc27a599d0bc.
    
    Now we have taught the fallback path not to infinitely recurse,
    re-enable the accelerated path for ShmPutImage and friends.

diff --git a/uxa/uxa-accel.c b/uxa/uxa-accel.c
index cd3e477..4f7fd41 100644
--- a/uxa/uxa-accel.c
+++ b/uxa/uxa-accel.c
@@ -450,18 +450,19 @@ uxa_copy_n_to_n(DrawablePtr pSrcDrawable,
 		goto fallback;
 	}
 
-	if (!uxa_pixmap_is_offscreen(pSrcPixmap) ||
-	    !uxa_pixmap_is_offscreen(pDstPixmap) ||
-	    !(*uxa_screen->info->prepare_copy) (pSrcPixmap, pDstPixmap,
+	if (!uxa_pixmap_is_offscreen(pDstPixmap))
+	    goto fallback;
+
+	if (uxa_pixmap_is_offscreen(pSrcPixmap)) {
+	    if (!(*uxa_screen->info->prepare_copy) (pSrcPixmap, pDstPixmap,
 						reverse ? -1 : 1,
 						upsidedown ? -1 : 1,
 						pGC ? pGC->alu : GXcopy,
 						pGC ? pGC->
-						planemask : FB_ALLONES)) {
+						planemask : FB_ALLONES))
 		goto fallback;
-	}
 
-	while (nbox--) {
+	    while (nbox--) {
 		(*uxa_screen->info->copy) (pDstPixmap,
 					   pbox->x1 + dx + src_off_x,
 					   pbox->y1 + dy + src_off_y,
@@ -470,9 +471,43 @@ uxa_copy_n_to_n(DrawablePtr pSrcDrawable,
 					   pbox->x2 - pbox->x1,
 					   pbox->y2 - pbox->y1);
 		pbox++;
-	}
+	    }
+
+	    (*uxa_screen->info->done_copy) (pDstPixmap);
+	} else {
+	    int stride, bpp;
+	    char *src;
 
-	(*uxa_screen->info->done_copy) (pDstPixmap);
+	    if (!uxa_screen->info->put_image)
+		goto fallback;
+
+	    /* Don't bother with under 8bpp, XYPixmaps. */
+	    bpp = pSrcPixmap->drawable.bitsPerPixel;
+	    if (bpp != pDstDrawable->bitsPerPixel || bpp < 8)
+		goto fallback;
+
+	    /* Only accelerate copies: no rop or planemask. */
+	    if (pGC && (!UXA_PM_IS_SOLID(pSrcDrawable, pGC->planemask) || pGC->alu != GXcopy))
+		goto fallback;
+
+	    src = pSrcPixmap->devPrivate.ptr;
+	    stride = pSrcPixmap->devKind;
+	    bpp /= 8;
+	    while (nbox--) {
+		if (!uxa_screen->info->put_image(pDstPixmap,
+						 pbox->x1 + dst_off_x,
+						 pbox->y1 + dst_off_y,
+						 pbox->x2 - pbox->x1,
+						 pbox->y2 - pbox->y1,
+						 (char *) src +
+						 (pbox->y1 + dy + src_off_y) * stride +
+						 (pbox->x1 + dx + src_off_x) * bpp,
+						 stride))
+		    goto fallback;
+
+		pbox++;
+	    }
+	}
 
 	return;
 
commit 1cc2c2c44ac72460cf1c4e6bdc13c612235809c9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Apr 14 11:18:25 2010 +0100

    i830: Use pixman_blt directly for performing the in-memory copy
    
    In order to avoid an infinite recursion after enabling CopyArea to use
    the put_image acceleration to either stream a blit or to copy in-place,
    we cannot call CopyArea from put_image for the fallback path. Instead,
    we can simply call pixman_blt directly, which coincidentally is a tiny
    bit faster.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/i830_uxa.c b/src/i830_uxa.c
index 09c2ef3..58ed491 100644
--- a/src/i830_uxa.c
+++ b/src/i830_uxa.c
@@ -767,7 +767,6 @@ static Bool i830_uxa_put_image(PixmapPtr pixmap,
 	ScrnInfoPtr scrn = xf86Screens[screen->myNum];
 	PixmapPtr scratch;
 	struct intel_pixmap *priv;
-	Bool scratch_pixmap;
 	GCPtr gc;
 	Bool ret;
 
@@ -804,39 +803,43 @@ static Bool i830_uxa_put_image(PixmapPtr pixmap,
 		ret = i830_bo_put_image(scratch, bo, src, src_pitch, w, h);
 
 		drm_intel_gem_bo_unmap_gtt(bo);
-		scratch_pixmap = FALSE;
 
-		if (!ret) {
-			(*screen->DestroyPixmap) (scratch);
-			return FALSE;
+		if (ret) {
+			gc = GetScratchGC(pixmap->drawable.depth, screen);
+			if (gc) {
+				ValidateGC(&pixmap->drawable, gc);
+
+				(*gc->ops->CopyArea)(&scratch->drawable,
+						     &pixmap->drawable,
+						     gc, 0, 0, w, h, x, y);
+
+				FreeScratchGC(gc);
+			} else
+				ret = FALSE;
 		}
+
+		(*screen->DestroyPixmap)(scratch);
 	} else {
 		/* bo is not busy so can be mapped without a stall, upload in-place. */
-		scratch = GetScratchPixmapHeader(screen, w, h,
-						 pixmap->drawable.depth,
-						 pixmap->drawable.bitsPerPixel,
-						 src_pitch, src);
-		scratch_pixmap = TRUE;
-	}
-
-	ret = FALSE;
-	gc = GetScratchGC(pixmap->drawable.depth, screen);
-	if (gc) {
-		ValidateGC(&pixmap->drawable, gc);
+		if (drm_intel_gem_bo_map_gtt(priv->bo)) {
+			xf86DrvMsg(scrn->scrnIndex, X_WARNING,
+				   "%s: bo map failed\n", __FUNCTION__);
+			return FALSE;
+		}
 
-		(*gc->ops->CopyArea)(&scratch->drawable,
-				     &pixmap->drawable,
-				     gc, 0, 0, w, h, x, y);
+		pixman_blt((uint32_t *)src, priv->bo->virtual,
+			   src_pitch / sizeof(uint32_t),
+			   pixmap->devKind / sizeof(uint32_t),
+			   pixmap->drawable.bitsPerPixel,
+			   pixmap->drawable.bitsPerPixel,
+			   0, 0,
+			   x, y,
+			   w, h);
 
-		FreeScratchGC(gc);
+		drm_intel_gem_bo_unmap_gtt(priv->bo);
 		ret = TRUE;
 	}
 
-	if (scratch_pixmap)
-		FreeScratchPixmapHeader(scratch);
-	else
-		(*screen->DestroyPixmap)(scratch);
-
 	return ret;
 }
 


More information about the xorg-commit mailing list