xf86-video-intel: 12 commits - src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna.h src/sna/sna_io.c

Chris Wilson ickle at kemper.freedesktop.org
Mon Nov 28 14:03:53 PST 2011


 src/sna/gen6_render.c |   11 +++--
 src/sna/gen7_render.c |   11 +++--
 src/sna/kgem.c        |  109 ++++++++++++++++++++++----------------------------
 src/sna/sna.h         |    2 
 src/sna/sna_accel.c   |   45 +++++++++++---------
 src/sna/sna_blt.c     |   15 +-----
 src/sna/sna_io.c      |   39 ++++++++++-------
 7 files changed, 115 insertions(+), 117 deletions(-)

New commits:
commit 5b1e9e15738b9001346ab6e0166f861ce308008e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Nov 28 22:01:00 2011 +0000

    sna: Always reduce tiling for thin pixmaps
    
    Benchmarking on the current code base, says this is now a win. A
    reversal of older benchmarks, so expect further tuning.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 73d4a61..fd189cf 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1398,23 +1398,17 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 		goto done;
 	}
 
-	/* Before the G33, we only have a small GTT to play with and tiled
-	 * surfaces always require full fence regions and so cause excessive
-	 * aperture thrashing.
-	 */
-	if (kgem->gen < 33) {
-		if (tiling == I915_TILING_X && width * bpp < 8*512/2) {
-			DBG(("%s: too thin [%d] for TILING_X\n",
-			     __FUNCTION__, width));
-			tiling = I915_TILING_NONE;
-			goto done;
-		}
-		if (tiling == I915_TILING_Y && width * bpp < 8*32/2) {
-			DBG(("%s: too thin [%d] for TILING_Y\n",
-			     __FUNCTION__, width));
-			tiling = I915_TILING_NONE;
-			goto done;
-		}
+	if (tiling == I915_TILING_X && width * bpp < 8*512/2) {
+		DBG(("%s: too thin [%d] for TILING_X\n",
+		     __FUNCTION__, width));
+		tiling = I915_TILING_NONE;
+		goto done;
+	}
+	if (tiling == I915_TILING_Y && width * bpp < 8*32/2) {
+		DBG(("%s: too thin [%d] for TILING_Y\n",
+		     __FUNCTION__, width));
+		tiling = I915_TILING_NONE;
+		goto done;
 	}
 
 	if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) {
commit dacb301c05dd1686e788d6ed8101887c27968389
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Nov 28 18:59:10 2011 +0000

    sna: Use the correct pitch when creating an untiled active buffer
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 1936ef2..73d4a61 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1496,7 +1496,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 {
 	struct list *cache;
 	struct kgem_bo *bo, *next;
-	uint32_t pitch, tiled_height[3], size;
+	uint32_t pitch, untiled_pitch, tiled_height[3], size;
 	uint32_t handle;
 	int exact = flags & CREATE_EXACT;
 	int i;
@@ -1514,6 +1514,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 	if (flags & CREATE_INACTIVE)
 		goto skip_active_search;
 
+	untiled_pitch = ALIGN(width * bpp / 8, 64);
 	for (i = 0; i <= I915_TILING_Y; i++)
 		tiled_height[i] = kgem_aligned_height(kgem, height, i);
 
@@ -1537,7 +1538,7 @@ search_active: /* Best active match first */
 				continue;
 			}
 		} else
-			bo->pitch = pitch;
+			bo->pitch = untiled_pitch;
 
 		s = bo->pitch * tiled_height[bo->tiling];
 		if (s <= bo->size) {
@@ -1640,7 +1641,7 @@ next_bo:
 	if (tiling != I915_TILING_NONE)
 		bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch);
 
-	assert (bo->size >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
+	assert(bo->size >= bo->pitch * kgem_aligned_height(kgem, height, bo->tiling));
 
 	DBG(("  new pitch=%d, tiling=%d, handle=%d, id=%d\n",
 	     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
commit 8657128fa7e758a2dde93340d6e58928d5f11255
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Nov 28 18:22:01 2011 +0000

    sna: Pass the pixmap to sna_replace()
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 4377774..c0ac888 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -626,8 +626,8 @@ void sna_write_boxes(struct sna *sna,
 		     const BoxRec *box, int n);
 
 struct kgem_bo *sna_replace(struct sna *sna,
+			    PixmapPtr pixmap,
 			    struct kgem_bo *bo,
-			    int width, int height, int bpp,
 			    const void *src, int stride);
 
 Bool
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 90c3046..c467f74 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -796,10 +796,8 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box)
 			    box->y2 >= pixmap->drawable.height) {
 				priv->gpu_bo =
 					sna_replace(sna,
+						    pixmap,
 						    priv->gpu_bo,
-						    pixmap->drawable.width,
-						    pixmap->drawable.height,
-						    pixmap->drawable.bitsPerPixel,
 						    pixmap->devPrivate.ptr,
 						    pixmap->devKind);
 			} else {
@@ -1106,10 +1104,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap)
 			    box->y2 >= pixmap->drawable.height) {
 				priv->gpu_bo =
 					sna_replace(sna,
+						    pixmap,
 						    priv->gpu_bo,
-						    pixmap->drawable.width,
-						    pixmap->drawable.height,
-						    pixmap->drawable.bitsPerPixel,
 						    pixmap->devPrivate.ptr,
 						    pixmap->devKind);
 			} else {
@@ -1343,11 +1339,7 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	    box->x2 >= pixmap->drawable.width &&
 	    box->y2 >= pixmap->drawable.height) {
 		priv->gpu_bo =
-			sna_replace(sna, priv->gpu_bo,
-				    pixmap->drawable.width,
-				    pixmap->drawable.height,
-				    pixmap->drawable.bitsPerPixel,
-				    bits, stride);
+			sna_replace(sna, pixmap, priv->gpu_bo, bits, stride);
 		return TRUE;
 	}
 
@@ -2149,10 +2141,9 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 				dst_priv->gpu_bo =
 					sna_replace(sna,
+						    dst_pixmap,
 						    dst_priv->gpu_bo,
-						    dst_pixmap->drawable.width,
-						    dst_pixmap->drawable.height,
-						    bpp, bits, stride);
+						    bits, stride);
 
 				sna_damage_destroy(&dst_priv->cpu_damage);
 				sna_damage_all(&dst_priv->gpu_damage,
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 4c7beab..5e4dccf 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1011,8 +1011,7 @@ blt_put_composite(struct sna *sna,
 		data += (src_y - dst_y) * pitch;
 
 		dst_priv->gpu_bo =
-			sna_replace(sna, dst_priv->gpu_bo,
-				    r->width, r->height, bpp,
+			sna_replace(sna, op->dst.pixmap, dst_priv->gpu_bo,
 				    data, pitch);
 	} else {
 		BoxRec box;
@@ -1051,11 +1050,7 @@ fastcall static void blt_put_composite_box(struct sna *sna,
 		data += (box->x1 + op->u.blt.sx) * bpp;
 
 		dst_priv->gpu_bo =
-			sna_replace(sna,
-				    op->dst.bo,
-				    op->dst.width,
-				    op->dst.height,
-				    src->drawable.bitsPerPixel,
+			sna_replace(sna, op->dst.pixmap, op->dst.bo,
 				    data, pitch);
 	} else {
 		sna_write_boxes(sna,
@@ -1091,11 +1086,7 @@ static void blt_put_composite_boxes(struct sna *sna,
 		data += (box->x1 + op->u.blt.sx) * bpp;
 
 		dst_priv->gpu_bo =
-			sna_replace(sna,
-				    op->dst.bo,
-				    op->dst.width,
-				    op->dst.height,
-				    src->drawable.bitsPerPixel,
+			sna_replace(sna, op->dst.pixmap, op->dst.bo,
 				    data, pitch);
 	} else {
 		sna_write_boxes(sna,
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 2930713..aba636c 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -424,22 +424,28 @@ void sna_write_boxes(struct sna *sna,
 }
 
 struct kgem_bo *sna_replace(struct sna *sna,
+			    PixmapPtr pixmap,
 			    struct kgem_bo *bo,
-			    int width, int height, int bpp,
 			    const void *src, int stride)
 {
 	struct kgem *kgem = &sna->kgem;
 	void *dst;
 
 	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
-	     __FUNCTION__, bo->handle, width, height, bpp, bo->tiling));
+	     __FUNCTION__, bo->handle,
+	     pixmap->drawable.width,
+	     pixmap->drawable.height,
+	     pixmap->drawable.bitsPerPixel,
+	     bo->tiling));
 
 	if (kgem_bo_is_busy(bo)) {
 		struct kgem_bo *new_bo;
 
 		new_bo = kgem_create_2d(kgem,
-					width, height, bpp, bo->tiling,
-					CREATE_INACTIVE);
+					pixmap->drawable.width,
+					pixmap->drawable.height,
+					pixmap->drawable.bitsPerPixel,
+					bo->tiling, CREATE_INACTIVE);
 		if (new_bo) {
 			kgem_bo_destroy(kgem, bo);
 			bo = new_bo;
@@ -447,18 +453,19 @@ struct kgem_bo *sna_replace(struct sna *sna,
 	}
 
 	if (bo->tiling == I915_TILING_NONE && bo->pitch == stride) {
-		kgem_bo_write(kgem, bo, src, (height-1)*stride + width*bpp/8);
-		return bo;
-	}
-
-	dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
-	if (dst) {
-		memcpy_blt(src, dst, bpp,
-			   stride, bo->pitch,
-			   0, 0,
-			   0, 0,
-			   width, height);
-		munmap(dst, bo->size);
+		kgem_bo_write(kgem, bo, src,
+			      (pixmap->drawable.height-1)*stride + pixmap->drawable.width*pixmap->drawable.bitsPerPixel/8);
+	} else {
+		dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+		if (dst) {
+			memcpy_blt(src, dst, pixmap->drawable.bitsPerPixel,
+				   stride, bo->pitch,
+				   0, 0,
+				   0, 0,
+				   pixmap->drawable.width,
+				   pixmap->drawable.height);
+			munmap(dst, bo->size);
+		}
 	}
 
 	return bo;
commit 4e38d22105da2bd97db005dc505e75dcd22291d3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Nov 28 18:21:19 2011 +0000

    sna: Tidy kgem_choose_tiling()
    
    Reduce the calls to compute the surface size down to one.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 4dea527..1936ef2 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -463,7 +463,7 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 	/* If it is too wide for the blitter, don't even bother.  */
 	*pitch = ALIGN(width * bpp / 8, tile_width);
 	if (kgem->gen < 40) {
-		if(tiling != I915_TILING_NONE) {
+		if (tiling != I915_TILING_NONE) {
 			if (*pitch > 8192)
 				return 0;
 			for (size = tile_width; size < *pitch; size <<= 1)
@@ -1338,63 +1338,47 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 	uint32_t pitch;
 
 	if (DBG_NO_TILING)
-		return I915_TILING_NONE;
+		return tiling < 0 ? tiling : I915_TILING_NONE;
 
 	if (kgem->gen < 40) {
 		if (tiling) {
 			if (width * bpp > 8192 * 8) {
 				DBG(("%s: pitch too large for tliing [%d]\n",
 				     __FUNCTION__, width*bpp/8));
-				return I915_TILING_NONE;
-			}
-
-			if ((width > 2048 || height > 2048) &&
-			    kgem_surface_size(kgem, false,
-					      width, height, bpp, I915_TILING_X,
-					      &pitch) < kgem->max_object_size) {
+				tiling = I915_TILING_NONE;
+				goto done;
+			} else if (width > 2048 || height > 2048) {
 				DBG(("%s: large buffer (%dx%d), forcing TILING_X\n",
 				     __FUNCTION__, width, height));
-				return -I915_TILING_X;
+				tiling = -I915_TILING_X;
 			}
 		}
 	} else {
-		if (width*bpp > (MAXSHORT-512) * 8 &&
-		    kgem_surface_size(kgem, false,
-				      width, height, bpp, I915_TILING_X,
-				      &pitch) < kgem->max_object_size) {
+		if (width*bpp > (MAXSHORT-512) * 8) {
 			DBG(("%s: large pitch [%d], forcing TILING_X\n",
 			     __FUNCTION__, width*bpp/8));
-			return -I915_TILING_X;
-		}
-
-		if (tiling && (width > 8192 || height > 8192) &&
-		    kgem_surface_size(kgem, false,
-				      width, height, bpp, I915_TILING_X,
-				      &pitch) < kgem->max_object_size) {
+			tiling = -I915_TILING_X;
+		} else if (tiling && (width > 8192 || height > 8192)) {
 			DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
 			     __FUNCTION__, width, height));
-			return -I915_TILING_X;
+			tiling = -I915_TILING_X;
 		}
 	}
 
-	if (tiling < 0) {
-		assert(kgem_surface_size(kgem, false,
-					 width, height, bpp, -tiling,
-					 &pitch) < kgem->max_object_size);
-		return tiling;
-	}
-
 	/* First check that we can fence the whole object */
 	if (tiling &&
 	    kgem_surface_size(kgem, false,
 			      width, height, bpp, tiling,
-			      &pitch) >= kgem->max_object_size) {
+			      &pitch) > kgem->max_object_size) {
 		DBG(("%s: too large (%dx%d) to be fenced, discarding tiling\n",
 		     __FUNCTION__, width, height));
 		tiling = I915_TILING_NONE;
 		goto done;
 	}
 
+	if (tiling < 0)
+		return tiling;
+
 	if (tiling == I915_TILING_Y && height <= 16) {
 		DBG(("%s: too short [%d] for TILING_Y\n",
 		     __FUNCTION__,height));
@@ -1433,10 +1417,8 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 		}
 	}
 
-	if (tiling &&
-	    ALIGN(height, 2) * ALIGN(width*bpp, 8*64) < 4096 * 8) {
-		DBG(("%s: too small [%d] for TILING_%c\n",
-		     __FUNCTION__,
+	if (tiling && ALIGN(height, 2) * ALIGN(width*bpp, 8*64) <= 4096 * 8) {
+		DBG(("%s: too small [%d] for TILING_%c\n", __FUNCTION__,
 		     ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8,
 		     tiling == I915_TILING_X ? 'X' : 'Y'));
 		tiling = I915_TILING_NONE;
@@ -1464,7 +1446,7 @@ static bool _kgem_can_create_2d(struct kgem *kgem,
 
 	size = kgem_surface_size(kgem, false,
 				 width, height, bpp, tiling, &pitch);
-	if (size == 0 || size >= kgem->max_object_size)
+	if (size == 0 || size > kgem->max_object_size)
 		size = kgem_surface_size(kgem, false,
 					 width, height, bpp,
 					 I915_TILING_NONE, &pitch);
commit 04f47e52da06cc20c2a266da92ba3f8e961433cb
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Nov 27 17:45:05 2011 +0000

    sna: Don't try to guess when the kernel believes a buffer to be flushed
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index c85ea3b..4dea527 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -207,6 +207,7 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 	if (gem_write(kgem->fd, bo->handle, 0, length, data))
 		return FALSE;
 
+	assert(!kgem_busy(kgem, bo->handle));
 	bo->needs_flush = false;
 	if (bo->gpu)
 		kgem_retire(kgem);
@@ -690,11 +691,8 @@ bool kgem_retire(struct kgem *kgem)
 			list_del(&bo->request);
 			bo->rq = NULL;
 
-#if 0
-			/* XXX we loose track of a write-flush somewhere? */
-			if (!bo->needs_flush)
+			if (bo->needs_flush)
 				bo->needs_flush = kgem_busy(kgem, bo->handle);
-#endif
 			bo->gpu = bo->needs_flush;
 
 			if (bo->refcnt == 0) {
@@ -709,6 +707,7 @@ bool kgem_retire(struct kgem *kgem)
 						DBG(("%s: moving %d to inactive\n",
 						     __FUNCTION__, bo->handle));
 						bo->purged = true;
+						assert(!kgem_busy(kgem,bo->handle));
 						list_move(&bo->list,
 							  inactive(kgem, bo->size));
 						retired = true;
@@ -1791,12 +1790,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 
 		assert(!bo->purged);
 
-		if (bo->exec == NULL) {
+		if (bo->exec == NULL)
 			_kgem_add_bo(kgem, bo);
-			if (bo->needs_flush &&
-			    (read_write_domain >> 16) != I915_GEM_DOMAIN_RENDER)
-				bo->needs_flush = false;
-		}
 
 		if (read_write_domain & KGEM_RELOC_FENCED && kgem->gen < 40) {
 			if (bo->tiling &&
@@ -1832,10 +1827,13 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 {
 	void *ptr;
 
+	assert(!kgem_busy(kgem, bo->handle));
+
 	ptr = gem_mmap(kgem->fd, bo->handle, bo->size, prot);
 	if (ptr == NULL)
 		return NULL;
 
+	assert(!kgem_busy(kgem, bo->handle));
 	bo->needs_flush = false;
 	if (bo->gpu)
 		kgem_retire(kgem);
@@ -1952,6 +1950,7 @@ void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write)
 	set_domain.write_domain = for_write ? I915_GEM_DOMAIN_CPU : 0;
 
 	drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+	assert(!kgem_busy(kgem, bo->handle));
 	bo->needs_flush = false;
 	if (bo->gpu) {
 		kgem->sync = false;
@@ -2310,6 +2309,7 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 		gem_read(kgem->fd,
 			 bo->base.handle, (char *)(bo+1)+offset,
 			 offset, length);
+		assert(!kgem_busy(kgem, bo->base.handle));
 		bo->base.needs_flush = false;
 		if (bo->base.gpu)
 			kgem_retire(kgem);
commit d87c332653001db4413c7dd81116ee03830db491
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Nov 27 10:18:11 2011 +0000

    sna: Eliminate empty glyphs during format conversion
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index cb0ef1f..90c3046 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -7371,6 +7371,12 @@ static bool sna_set_glyph(CharInfoPtr in, CharInfoPtr out)
 		return false;
 	}
 
+	/* Skip empty glyphs */
+	if (w == 1 && h == 1 && (in->bits[0] & 1) == 0) {
+		out->bits = (void *)-1;
+		return false;
+	}
+
 	w = (w + 7) >> 3;
 
 	out->metrics = in->metrics;
commit 413776c79ee7c55b3a48b1ed29af63f8cea39337
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Nov 27 10:17:40 2011 +0000

    sna: Use 64-bit mov (when available) for bitmap glyphs
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 4b013c9..cb0ef1f 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -7282,13 +7282,11 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 			b[0] = br00 | (1 + len);
 			b[1] = (uint16_t)y1 << 16 | (uint16_t)x1;
 			b[2] = (uint16_t)(y1+h) << 16 | (uint16_t)(x1+w);
-			{
-				uint32_t *glyph = (uint32_t*)c->bits;
-				b += 3;
+			 {
+				uint64_t *src = (uint64_t *)c->bits;
+				uint64_t *dst = (uint64_t *)(b + 3);
 				do  {
-					*b++ = *glyph++;
-					*b++ = *glyph++;
-
+					*dst++ = *src++;
 					len -= 2;
 				} while (len);
 			}
commit 507f99eba2c72bc109c9267c698f3d5f9fb6f539
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Nov 26 17:57:57 2011 +0000

    sna/gen6+: Only use BLT if the untiled bo will cause per-pixel TLB misses
    
    i.e. only force the BLT if using the sampler is going to be incredibly
    slow.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 6fbf636..a739f5c 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2581,13 +2581,18 @@ gen6_emit_copy_state(struct sna *sna,
 	gen6_emit_state(sna, op, offset);
 }
 
+static inline bool untiled_tlb_miss(struct kgem_bo *bo)
+{
+	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
+}
+
 static inline bool prefer_blt_copy(struct sna *sna,
 				   struct kgem_bo *src_bo,
 				   struct kgem_bo *dst_bo)
 {
-	return (src_bo->tiling == I915_TILING_NONE ||
-		dst_bo->tiling == I915_TILING_NONE ||
-		sna->kgem.ring == KGEM_BLT);
+	return (sna->kgem.ring == KGEM_BLT ||
+		untiled_tlb_miss(src_bo) ||
+		untiled_tlb_miss(dst_bo));
 }
 
 static Bool
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 1e614b8..ce14a5d 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2701,13 +2701,18 @@ gen7_emit_copy_state(struct sna *sna,
 	gen7_emit_state(sna, op, offset);
 }
 
+static inline bool untiled_tlb_miss(struct kgem_bo *bo)
+{
+	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
+}
+
 static inline bool prefer_blt_copy(struct sna *sna,
 				   struct kgem_bo *src_bo,
 				   struct kgem_bo *dst_bo)
 {
-	return (src_bo->tiling == I915_TILING_NONE ||
-		dst_bo->tiling == I915_TILING_NONE ||
-		sna->kgem.ring == KGEM_BLT);
+	return (sna->kgem.ring == KGEM_BLT ||
+		untiled_tlb_miss(src_bo) ||
+		untiled_tlb_miss(dst_bo));
 }
 
 static Bool
commit f11e9f189111f53ace20381a0bd4f559ccd3605f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Nov 26 17:12:03 2011 +0000

    sna: Only check for all-damaged CPU bo if we may discard the GPU bo
    
    The goal of the optimisation is to discard the GPU bo early, so we
    can skip the extra damage reduction if there is no gpu bo.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 1cb79b8..4b013c9 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -741,7 +741,8 @@ done:
 		DBG(("%s: applying cpu damage\n", __FUNCTION__));
 		assert_pixmap_contains_box(pixmap, RegionExtents(region));
 		sna_damage_add(&priv->cpu_damage, region);
-		if (sna_damage_is_all(&priv->cpu_damage,
+		if (priv->gpu_bo &&
+		    sna_damage_is_all(&priv->cpu_damage,
 				      pixmap->drawable.width,
 				      pixmap->drawable.height)) {
 			DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
@@ -1437,7 +1438,8 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		assert_pixmap_contains_box(pixmap, RegionExtents(region));
 		sna_damage_subtract(&priv->gpu_damage, region);
 		sna_damage_add(&priv->cpu_damage, region);
-		if (sna_damage_is_all(&priv->cpu_damage,
+		if (priv->gpu_bo &&
+		    sna_damage_is_all(&priv->cpu_damage,
 				      pixmap->drawable.width,
 				      pixmap->drawable.height)) {
 			DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
commit ccee730e45d8c2490e578d0b85d9e76f3a44f76f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Nov 26 16:47:04 2011 +0000

    sna: Discard the gpu-bo is the entire pixmap is written to by the CPU.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 03020c9..1cb79b8 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -741,6 +741,12 @@ done:
 		DBG(("%s: applying cpu damage\n", __FUNCTION__));
 		assert_pixmap_contains_box(pixmap, RegionExtents(region));
 		sna_damage_add(&priv->cpu_damage, region);
+		if (sna_damage_is_all(&priv->cpu_damage,
+				      pixmap->drawable.width,
+				      pixmap->drawable.height)) {
+			DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
+			sna_pixmap_destroy_gpu_bo(sna, priv);
+		}
 		if (priv->flush)
 			list_move(&priv->list, &sna->dirty_pixmaps);
 	}
commit df68b20a17e28c0898226f970315bc8e515e9f56
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Nov 26 16:45:22 2011 +0000

    sna: Disabling tiling if the bo would be smaller than a page
    
    Once again experiment with untiled smalled buffers.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fe8cf48..c85ea3b 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1434,6 +1434,16 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 		}
 	}
 
+	if (tiling &&
+	    ALIGN(height, 2) * ALIGN(width*bpp, 8*64) < 4096 * 8) {
+		DBG(("%s: too small [%d] for TILING_%c\n",
+		     __FUNCTION__,
+		     ALIGN(height, 2) * ALIGN(width*bpp, 8*64) / 8,
+		     tiling == I915_TILING_X ? 'X' : 'Y'));
+		tiling = I915_TILING_NONE;
+		goto done;
+	}
+
 done:
 	DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling));
 	return tiling;
commit d88ad2e60ac3d9f5608db1653a7af789f7f1b446
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Nov 27 09:01:09 2011 +0000

    sna: Round up stride to alignment for TLB miss checking
    
    We want to avoid the condition of reducing the tiling mode (when reusing
    an active untiled buffer in preference to creating a new) for a wide buffer
    when doing will force a TLB miss on each sample.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 9fecb1f..fe8cf48 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1401,7 +1401,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 		     __FUNCTION__,height));
 		tiling = I915_TILING_X;
 	}
-	if (tiling && width * bpp >= 8 * 4096) {
+	if (tiling && width * bpp > 8 * (4096 - 64)) {
 		DBG(("%s: TLB miss between lines %dx%d (pitch=%d), forcing tiling %d\n",
 		     __FUNCTION__,
 		     width, height, width*bpp/8,


More information about the xorg-commit mailing list