xf86-video-intel: 9 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/sna_accel.c

Chris Wilson ickle at kemper.freedesktop.org
Sun Dec 18 06:26:35 PST 2011


 src/sna/gen2_render.c |   33 +++++++++-
 src/sna/gen3_render.c |   72 +++++++++++++++--------
 src/sna/gen5_render.c |   38 +++++++++++-
 src/sna/gen6_render.c |   43 +++++++++++++-
 src/sna/gen7_render.c |   43 +++++++++++++-
 src/sna/kgem.c        |  151 ++++++++++++++++++++++++++++----------------------
 src/sna/sna_accel.c   |   59 ++++++++++++++++---
 7 files changed, 327 insertions(+), 112 deletions(-)

New commits:
commit b43548af39f8773283b744e979ee575032183cdc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 12:11:01 2011 +0000

    sna: Explicitly handle errors from madv
    
    In order to avoid conflating whether a bo was marked purgeable with its
    retained state, we need to carefully handle the errors from madv.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index e5fec10..80ed710 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -80,6 +80,7 @@ static inline void list_replace(struct list *old,
 #define DBG_NO_HW 0
 #define DBG_NO_TILING 0
 #define DBG_NO_VMAP 0
+#define DBG_NO_MADV 0
 #define DBG_NO_RELAXED_FENCING 0
 #define DBG_DUMP 0
 
@@ -266,19 +267,69 @@ static uint32_t gem_create(int fd, int size)
 }
 
 static bool
-gem_madvise(int fd, uint32_t handle, uint32_t state)
+kgem_bo_set_purgeable(struct kgem *kgem, struct kgem_bo *bo)
 {
+#if DBG_NO_MADV
+	return true;
+#else
 	struct drm_i915_gem_madvise madv;
-	int ret;
+
+	assert(bo->exec == NULL);
+	assert(!bo->purged);
+
+	VG_CLEAR(madv);
+	madv.handle = bo->handle;
+	madv.madv = I915_MADV_DONTNEED;
+	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
+		bo->purged = 1;
+		return madv.retained;
+	}
+
+	return true;
+#endif
+}
+
+static bool
+kgem_bo_is_retained(struct kgem *kgem, struct kgem_bo *bo)
+{
+#if DBG_NO_MADV
+	return true;
+#else
+	struct drm_i915_gem_madvise madv;
+
+	if (!bo->purged)
+		return true;
 
 	VG_CLEAR(madv);
-	madv.handle = handle;
-	madv.madv = state;
-	madv.retained = 1;
-	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
+	madv.handle = bo->handle;
+	madv.madv = I915_MADV_DONTNEED;
+	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0)
+		return madv.retained;
 
-	return madv.retained;
-	(void)ret;
+	return false;
+#endif
+}
+
+static bool
+kgem_bo_clear_purgeable(struct kgem *kgem, struct kgem_bo *bo)
+{
+#if DBG_NO_MADV
+	return true;
+#else
+	struct drm_i915_gem_madvise madv;
+
+	assert(bo->purged);
+
+	VG_CLEAR(madv);
+	madv.handle = bo->handle;
+	madv.madv = I915_MADV_WILLNEED;
+	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv) == 0) {
+		bo->purged = 0;
+		return madv.retained;
+	}
+
+	return false;
+#endif
 }
 
 static void gem_close(int fd, uint32_t handle)
@@ -731,18 +782,12 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		bo->rq = &_kgem_static_request;
 	} else {
 		if (!IS_CPU_MAP(bo->map)) {
-			assert(!bo->purged);
-
-			DBG(("%s: handle=%d, purged\n",
-			     __FUNCTION__, bo->handle));
-
-			if (!gem_madvise(kgem->fd, bo->handle,
-					 I915_MADV_DONTNEED)) {
+			if (!kgem_bo_set_purgeable(kgem, bo)) {
 				kgem->need_purge |= bo->domain == DOMAIN_GPU;
 				goto destroy;
 			}
-
-			bo->purged = true;
+			DBG(("%s: handle=%d, purged\n",
+			     __FUNCTION__, bo->handle));
 		}
 
 		DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
@@ -779,8 +824,7 @@ bool kgem_retire(struct kgem *kgem)
 
 		DBG(("%s: moving %d from flush to inactive\n",
 		     __FUNCTION__, bo->handle));
-		if (gem_madvise(kgem->fd, bo->handle, I915_MADV_DONTNEED)) {
-			bo->purged = true;
+		if (kgem_bo_set_purgeable(kgem, bo)) {
 			bo->needs_flush = false;
 			bo->domain = DOMAIN_NONE;
 			assert(bo->rq == &_kgem_static_request);
@@ -812,6 +856,7 @@ bool kgem_retire(struct kgem *kgem)
 			list_del(&bo->request);
 			bo->rq = NULL;
 
+			assert(bo->exec == NULL);
 			if (bo->needs_flush)
 				bo->needs_flush = kgem_busy(kgem, bo->handle);
 			if (!bo->needs_flush)
@@ -824,12 +869,9 @@ bool kgem_retire(struct kgem *kgem)
 						     __FUNCTION__, bo->handle));
 						list_add(&bo->request, &kgem->flushing);
 						bo->rq = &_kgem_static_request;
-					} else if(gem_madvise(kgem->fd,
-							      bo->handle,
-							      I915_MADV_DONTNEED)) {
+					} else if(kgem_bo_set_purgeable(kgem, bo)) {
 						DBG(("%s: moving %d to inactive\n",
 						     __FUNCTION__, bo->handle));
-						bo->purged = true;
 						list_move(&bo->list,
 							  inactive(kgem, bo->size));
 						retired = true;
@@ -848,8 +890,7 @@ bool kgem_retire(struct kgem *kgem)
 
 		rq->bo->refcnt--;
 		assert(rq->bo->refcnt == 0);
-		if (gem_madvise(kgem->fd, rq->bo->handle, I915_MADV_DONTNEED)) {
-			rq->bo->purged = true;
+		if (kgem_bo_set_purgeable(kgem, rq->bo)) {
 			assert(rq->bo->rq == NULL);
 			assert(list_is_empty(&rq->bo->request));
 			list_move(&rq->bo->list, inactive(kgem, rq->bo->size));
@@ -1172,7 +1213,7 @@ void _kgem_submit(struct kgem *kgem)
 				kgem->wedged = 1;
 				ret = 0;
 			}
-#if DEBUG_KGEM
+#if !NDEBUG
 			if (ret < 0) {
 				int i;
 				ErrorF("batch (end=%d, size=%d) submit failed: %d\n",
@@ -1213,10 +1254,10 @@ void _kgem_submit(struct kgem *kgem)
 					       kgem->reloc[i].write_domain,
 					       (int)kgem->reloc[i].presumed_offset);
 				}
-				abort();
+				FatalError("SNA: failed to submit batchbuffer: ret=%d\n",
+					   errno);
 			}
 #endif
-			assert(ret == 0);
 
 			if (DEBUG_FLUSH_SYNC) {
 				struct drm_i915_gem_set_domain set_domain;
@@ -1323,9 +1364,7 @@ bool kgem_expire_cache(struct kgem *kgem)
 			bo = list_last_entry(&kgem->inactive[i],
 					     struct kgem_bo, list);
 
-			if ((!bo->purged ||
-			     gem_madvise(kgem->fd, bo->handle,
-					 I915_MADV_DONTNEED)) &&
+			if (kgem_bo_is_retained(kgem, bo) &&
 			    bo->delta > expire) {
 				idle = false;
 				break;
@@ -1399,14 +1438,9 @@ search_linear_cache(struct kgem *kgem, unsigned int size, bool use_active)
 		if (use_active && bo->tiling != I915_TILING_NONE)
 			continue;
 
-		if (bo->purged) {
-			if (!gem_madvise(kgem->fd, bo->handle,
-					 I915_MADV_WILLNEED)) {
-				kgem->need_purge |= bo->domain == DOMAIN_GPU;
-				continue;
-			}
-
-			bo->purged = false;
+		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+			kgem->need_purge |= bo->domain == DOMAIN_GPU;
+			continue;
 		}
 
 		if (I915_TILING_NONE != bo->tiling &&
@@ -1697,14 +1731,9 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 			bo->pitch = pitch;
 			list_del(&bo->list);
 
-			if (bo->purged) {
-				if (!gem_madvise(kgem->fd, bo->handle,
-						 I915_MADV_WILLNEED)) {
-					kgem_bo_free(kgem, bo);
-					break;
-				}
-
-				bo->purged = false;
+			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+				kgem_bo_free(kgem, bo);
+				break;
 			}
 
 			bo->delta = 0;
@@ -1755,16 +1784,11 @@ search_active: /* Best active match first */
 			if (bo->rq == NULL)
 				list_del(&bo->request);
 
-			if (bo->purged) {
-				if (!gem_madvise(kgem->fd, bo->handle,
-						 I915_MADV_WILLNEED)) {
-					kgem->need_purge |= bo->domain == DOMAIN_GPU;
-					kgem_bo_free(kgem, bo);
-					bo = NULL;
-					goto search_active;
-				}
-
-				bo->purged = false;
+			if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+				kgem->need_purge |= bo->domain == DOMAIN_GPU;
+				kgem_bo_free(kgem, bo);
+				bo = NULL;
+				goto search_active;
 			}
 
 			bo->unique_id = kgem_get_unique_id(kgem);
@@ -1809,14 +1833,9 @@ skip_active_search:
 		list_del(&bo->list);
 		assert(list_is_empty(&bo->request));
 
-		if (bo->purged) {
-			if (!gem_madvise(kgem->fd, bo->handle,
-					 I915_MADV_WILLNEED)) {
-				kgem->need_purge |= bo->domain == DOMAIN_GPU;
-				goto next_bo;
-			}
-
-			bo->purged = false;
+		if (bo->purged && !kgem_bo_clear_purgeable(kgem, bo)) {
+			kgem->need_purge |= bo->domain == DOMAIN_GPU;
+			goto next_bo;
 		}
 
 		if (bo->map)
@@ -2067,7 +2086,7 @@ static void kgem_trim_vma_cache(struct kgem *kgem)
 		list_del(&old->vma);
 		kgem->vma_count--;
 
-		if (old->domain != DOMAIN_GPU && old->refcnt == 0)
+		if (old->rq == NULL && old->refcnt == 0)
 			kgem_bo_free(kgem, old);
 	}
 }
commit 954cf5129d462f7f4ff5d4ff44b256e0f091667c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 10:40:35 2011 +0000

    sna/gen[67]: check for context switch after preparing source
    
    If we used the BLT to prepare the source, see if we can continue the
    operation on the BLT.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 77671a4..5a83fd0 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2078,7 +2078,7 @@ gen6_composite_set_target(struct sna *sna,
 
 static Bool
 try_blt(struct sna *sna,
-       	PicturePtr dst, PicturePtr src,
+	PicturePtr dst, PicturePtr src,
 	int width, int height)
 {
 	if (sna->kgem.ring == KGEM_BLT) {
@@ -2295,6 +2295,19 @@ gen6_render_composite(struct sna *sna,
 		break;
 	}
 
+	/* Did we just switch rings to prepare the source? */
+	if (sna->kgem.ring == KGEM_BLT && mask == NULL &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp)) {
+		if (tmp->redirect.real_bo)
+			kgem_bo_destroy(&sna->kgem, tmp->redirect.real_bo);
+		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+		return TRUE;
+	}
+
 	tmp->is_affine = tmp->src.is_affine;
 	tmp->has_component_alpha = FALSE;
 	tmp->need_magic_ca_pass = FALSE;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 20945bc..aeef0ae 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2408,6 +2408,19 @@ gen7_render_composite(struct sna *sna,
 		break;
 	}
 
+	/* Did we just switch rings to prepare the source? */
+	if (sna->kgem.ring == KGEM_BLT && mask == NULL &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp)) {
+		if (tmp->redirect.real_bo)
+			kgem_bo_destroy(&sna->kgem, tmp->redirect.real_bo);
+		kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+		return TRUE;
+	}
+
 	tmp->is_affine = tmp->src.is_affine;
 	tmp->has_component_alpha = FALSE;
 	tmp->need_magic_ca_pass = FALSE;
commit 90a432431c8fc293db611289c3369669c2975706
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 10:23:11 2011 +0000

    sna/gen[23]: Try BLT if the source/target do no fit in the 3D pipeline
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 9f8e6db..c8d093b 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1238,7 +1238,8 @@ gen2_composite_set_target(struct sna *sna,
 
 static Bool
 try_blt(struct sna *sna,
-	PicturePtr source,
+	PicturePtr dst,
+	PicturePtr src,
 	int width, int height)
 {
 	uint32_t color;
@@ -1254,14 +1255,27 @@ try_blt(struct sna *sna,
 		return TRUE;
 	}
 
+	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
+		DBG(("%s: target too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     dst->pDrawable->width, dst->pDrawable->height));
+		return TRUE;
+	}
+
 	/* If it is a solid, try to use the BLT paths */
-	if (sna_picture_is_solid(source, &color))
+	if (sna_picture_is_solid(src, &color))
 		return TRUE;
 
-	if (!source->pDrawable)
+	if (!src->pDrawable)
 		return FALSE;
 
-	return is_cpu(source->pDrawable);
+	if (too_large(src->pDrawable->width, src->pDrawable->height)) {
+		DBG(("%s: source too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     src->pDrawable->width, src->pDrawable->height));
+		return TRUE;
+	}
+	return is_cpu(src->pDrawable);
 }
 
 static bool
@@ -1402,7 +1416,7 @@ gen2_render_composite(struct sna *sna,
 	 * 3D -> 2D context switch.
 	 */
 	if (mask == NULL &&
-	    try_blt(sna, src, width, height) &&
+	    try_blt(sna, dst, src, width, height) &&
 	    sna_blt_composite(sna,
 			      op, src, dst,
 			      src_x, src_y,
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index cd0044a..96c0d9b 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2137,7 +2137,7 @@ gen3_composite_picture(struct sna *sna,
 }
 
 static inline Bool
-picture_is_cpu(PicturePtr picture)
+source_use_blt(struct sna *sna, PicturePtr picture)
 {
 	if (!picture->pDrawable)
 		return FALSE;
@@ -2148,12 +2148,21 @@ picture_is_cpu(PicturePtr picture)
 	    picture->repeat)
 		return FALSE;
 
+	if (too_large(picture->pDrawable->width,
+		      picture->pDrawable->height))
+		return TRUE;
+
+	/* If we can sample directly from user-space, do so */
+	if (sna->kgem.has_vmap)
+		return FALSE;
+
 	return is_cpu(picture->pDrawable);
 }
 
 static Bool
 try_blt(struct sna *sna,
-	PicturePtr source,
+	PicturePtr dst,
+	PicturePtr src,
 	int width, int height)
 {
 	if (sna->kgem.mode != KGEM_RENDER) {
@@ -2167,12 +2176,15 @@ try_blt(struct sna *sna,
 		return TRUE;
 	}
 
-	/* If we can sample directly from user-space, do so */
-	if (sna->kgem.has_vmap)
-		return FALSE;
+	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
+		DBG(("%s: target too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     dst->pDrawable->width, dst->pDrawable->height));
+		return TRUE;
+	}
 
 	/* is the source picture only in cpu memory e.g. a shm pixmap? */
-	return picture_is_cpu(source);
+	return source_use_blt(sna, src);
 }
 
 static void
@@ -2397,7 +2409,7 @@ gen3_render_composite(struct sna *sna,
 	 * 3D -> 2D context switch.
 	 */
 	if (mask == NULL &&
-	    try_blt(sna, src, width, height) &&
+	    try_blt(sna, dst, src, width, height) &&
 	    sna_blt_composite(sna,
 			      op, src, dst,
 			      src_x, src_y,
commit eeb9741981be528d1fba3021a30ab24084a5ce8c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 10:04:08 2011 +0000

    sna/gen3: Tidy checks against hardcoded maximum 3D pipeline size
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 2802930..cd0044a 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -64,6 +64,8 @@ enum {
 	SHADER_OPACITY,
 };
 
+#define MAX_3D_SIZE 2048
+
 #define OUT_BATCH(v) batch_emit(sna, v)
 #define OUT_BATCH_F(v) batch_emit_float(sna, v)
 #define OUT_VERTEX(v) vertex_emit(sna, v)
@@ -122,6 +124,11 @@ static const struct formatinfo {
 
 #define xFixedToDouble(f) pixman_fixed_to_double(f)
 
+static inline bool too_large(int width, int height)
+{
+	return (width | height) > MAX_3D_SIZE;
+}
+
 static inline uint32_t gen3_buf_tiling(uint32_t tiling)
 {
 	uint32_t v = 0;
@@ -2121,7 +2128,7 @@ gen3_composite_picture(struct sna *sna,
 		return sna_render_picture_convert(sna, picture, channel, pixmap,
 						  x, y, w, h, dst_x, dst_y);
 
-	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
+	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
 		return sna_render_picture_extract(sna, picture, channel,
 						  x, y, w, h, dst_x, dst_y);
 
@@ -2154,7 +2161,7 @@ try_blt(struct sna *sna,
 		return TRUE;
 	}
 
-	if (width > 2048 || height > 2048) {
+	if (too_large(width, height)) {
 		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
 		     __FUNCTION__, width, height));
 		return TRUE;
@@ -2430,7 +2437,7 @@ gen3_render_composite(struct sna *sna,
 
 	tmp->op = op;
 	tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format);
-	if (tmp->dst.width > 2048 || tmp->dst.height > 2048 ||
+	if (too_large(tmp->dst.width, tmp->dst.height) ||
 	    !gen3_check_pitch_3d(tmp->dst.bo)) {
 		if (!sna_render_composite_redirect(sna, tmp,
 						   dst_x, dst_y, width, height))
@@ -2963,7 +2970,7 @@ gen3_render_composite_spans(struct sna *sna,
 
 	tmp->base.op = op;
 	tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format);
-	if (tmp->base.dst.width > 2048 || tmp->base.dst.height > 2048 ||
+	if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
 	    !gen3_check_pitch_3d(tmp->base.dst.bo)) {
 		if (!sna_render_composite_redirect(sna, &tmp->base,
 						   dst_x, dst_y, width, height))
@@ -3442,8 +3449,7 @@ gen3_render_video(struct sna *sna,
 	if (dst_bo == NULL)
 		return FALSE;
 
-	if (pixmap->drawable.width > 2048 ||
-	    pixmap->drawable.height > 2048 ||
+	if (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
 	    !gen3_check_pitch_3d(dst_bo)) {
 		int bpp = pixmap->drawable.bitsPerPixel;
 
@@ -3601,11 +3607,9 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
 	if (!(alu == GXcopy || alu == GXclear) ||
 	    src_bo == dst_bo || /* XXX handle overlap using 3D ? */
 	    src_bo->pitch > 8192 ||
-	    src->drawable.width > 2048 ||
-	    src->drawable.height > 2048 ||
+	    too_large(src->drawable.width, src->drawable.height) ||
 	    dst_bo->pitch > 8192 ||
-	    dst->drawable.width > 2048 ||
-	    dst->drawable.height > 2048) {
+	    too_large(dst->drawable.width, dst->drawable.height)) {
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
 
@@ -3741,8 +3745,8 @@ gen3_render_copy(struct sna *sna, uint8_t alu,
 
 	/* Must use the BLT if we can't RENDER... */
 	if (!(alu == GXcopy || alu == GXclear) ||
-	    src->drawable.width > 2048 || src->drawable.height > 2048 ||
-	    dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+	    too_large(src->drawable.width, src->drawable.height) ||
+	    too_large(dst->drawable.width, dst->drawable.height) ||
 	    src_bo->pitch > 8192 || dst_bo->pitch > 8192) {
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
@@ -3861,8 +3865,7 @@ gen3_render_fill_boxes(struct sna *sna,
 	     __FUNCTION__, op, (int)format,
 	     color->red, color->green, color->blue, color->alpha));
 
-	if (dst->drawable.width > 2048 ||
-	    dst->drawable.height > 2048 ||
+	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch > 8192 ||
 	    !gen3_check_dst_format(format))
 		return gen3_render_fill_boxes_try_blt(sna, op, format, color,
@@ -4032,7 +4035,7 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
 
 	/* Must use the BLT if we can't RENDER... */
 	if (!(alu == GXcopy || alu == GXclear) ||
-	    dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+	    too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch > 8192)
 		return sna_blt_fill(sna, alu,
 				    dst_bo, dst->drawable.bitsPerPixel,
@@ -4112,7 +4115,7 @@ gen3_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
 	/* Must use the BLT if we can't RENDER... */
 	if (!(alu == GXcopy || alu == GXclear) ||
-	    dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+	    too_large(dst->drawable.width, dst->drawable.height) ||
 	    bo->pitch > 8192)
 		return gen3_render_fill_one_try_blt(sna, dst, bo, color,
 						    x1, y1, x2, y2, alu);
@@ -4189,6 +4192,6 @@ Bool gen3_render_init(struct sna *sna)
 	render->flush = gen3_render_flush;
 	render->fini = gen3_render_fini;
 
-	render->max_3d_size = 2048;
+	render->max_3d_size = MAX_3D_SIZE;
 	return TRUE;
 }
commit dcfcf438a511ee43e5969d01e41b3040a08cac3a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 09:58:08 2011 +0000

    sna/gen2+: If we use the BLT to prepare the target, try using BLT for op
    
    If we incurred a context switch to the BLT in order to prepare the
    target (uploading damage for instance), we should recheck whether we can
    continue the operation on the BLT rather than force a switch back to
    RENDER.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index fb6b75c..9f8e6db 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1427,6 +1427,15 @@ gen2_render_composite(struct sna *sna,
 		     __FUNCTION__));
 		return FALSE;
 	}
+
+	if (mask == NULL && sna->kgem.mode == KGEM_BLT  &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp))
+		return TRUE;
+
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
 	tmp->op = op;
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 3fb98bc..2802930 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2417,6 +2417,15 @@ gen3_render_composite(struct sna *sna,
 		     __FUNCTION__));
 		return FALSE;
 	}
+
+	if (mask == NULL && sna->kgem.mode == KGEM_BLT  &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp))
+		return TRUE;
+
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
 	tmp->op = op;
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 41830fc..da9de8d 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2077,6 +2077,15 @@ gen5_render_composite(struct sna *sna,
 		DBG(("%s: failed to set composite target\n", __FUNCTION__));
 		return FALSE;
 	}
+
+	if (mask == NULL && sna->kgem.mode == KGEM_BLT  &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp))
+		return TRUE;
+
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
 	if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index aeb9dfe..77671a4 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2265,6 +2265,15 @@ gen6_render_composite(struct sna *sna,
 	tmp->op = op;
 	if (!gen6_composite_set_target(sna, tmp, dst))
 		return FALSE;
+
+	if (mask == NULL && sna->kgem.mode == KGEM_BLT  &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp))
+		return TRUE;
+
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
 	if (too_large(tmp->dst.width, tmp->dst.height)) {
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 178b37a..20945bc 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2378,6 +2378,15 @@ gen7_render_composite(struct sna *sna,
 	tmp->op = op;
 	if (!gen7_composite_set_target(tmp, dst))
 		return FALSE;
+
+	if (mask == NULL && sna->kgem.mode == KGEM_BLT  &&
+	    sna_blt_composite(sna, op,
+			      src, dst,
+			      src_x, src_y,
+			      dst_x, dst_y,
+			      width, height, tmp))
+		return TRUE;
+
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
 	if (too_large(tmp->dst.width, tmp->dst.height)) {
commit 507debe8015ee10734a0b8a6ddf4aa251b7dddc5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 01:45:47 2011 +0000

    sna/gen5: If we need to flush the composite op, check to see if we can blit
    
    If we need to halt the 3D engine in order to flush the pipeline for a
    dirty source, we may as well re-evaluate whether we can use the BLT
    instead.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 42703c5..41830fc 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2174,8 +2174,19 @@ gen5_render_composite(struct sna *sna,
 			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo, NULL))
 		kgem_submit(&sna->kgem);
 
-	if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo))
+	if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) {
+		if (mask == NULL &&
+		    tmp->redirect.real_bo == NULL &&
+		    sna_blt_composite(sna, op,
+				      src, dst,
+				      src_x, src_y,
+				      dst_x, dst_y,
+				      width, height, tmp)) {
+			kgem_bo_destroy(&sna->kgem, tmp->src.bo);
+			return TRUE;
+		}
 		kgem_emit_flush(&sna->kgem);
+	}
 
 	gen5_bind_surfaces(sna, tmp);
 	gen5_align_vertex(sna, tmp);
commit de530f89a36a80b652ed01001e6f0d4c8b1dc11a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 01:41:03 2011 +0000

    sna/gen5+: First try a blt composite if the source/dest are too large
    
    If we will need to extract either the source or the destination, we
    should see if we can do the entire operation on the BLT.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 89b7bef..42703c5 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1899,11 +1899,17 @@ picture_is_cpu(PicturePtr picture)
 	    picture->repeat)
 		return FALSE;
 
+	if (picture->pDrawable->width > 8192 ||
+	    picture->pDrawable->height > 8192)
+		return TRUE;
+
 	return is_cpu(picture->pDrawable);
 }
 
 static Bool
-try_blt(struct sna *sna, PicturePtr source, int width, int height)
+try_blt(struct sna *sna,
+	PicturePtr dst, PicturePtr src,
+	int width, int height)
 {
 	if (sna->kgem.mode == KGEM_BLT) {
 		DBG(("%s: already performing BLT\n", __FUNCTION__));
@@ -1916,8 +1922,12 @@ try_blt(struct sna *sna, PicturePtr source, int width, int height)
 		return TRUE;
 	}
 
+	if (dst->pDrawable->width > 8192 ||
+	    dst->pDrawable->height > 8192)
+		return TRUE;
+
 	/* is the source picture only in cpu memory e.g. a shm pixmap? */
-	return picture_is_cpu(source);
+	return picture_is_cpu(src);
 }
 
 static bool
@@ -2044,7 +2054,7 @@ gen5_render_composite(struct sna *sna,
 	}
 
 	if (mask == NULL &&
-	    try_blt(sna, src, width, height) &&
+	    try_blt(sna, dst, src, width, height) &&
 	    sna_blt_composite(sna, op,
 			      src, dst,
 			      src_x, src_y,
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 4ec0bd6..aeb9dfe 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2077,7 +2077,9 @@ gen6_composite_set_target(struct sna *sna,
 }
 
 static Bool
-try_blt(struct sna *sna, int width, int height)
+try_blt(struct sna *sna,
+       	PicturePtr dst, PicturePtr src,
+	int width, int height)
 {
 	if (sna->kgem.ring == KGEM_BLT) {
 		DBG(("%s: already performing BLT\n", __FUNCTION__));
@@ -2090,6 +2092,21 @@ try_blt(struct sna *sna, int width, int height)
 		return TRUE;
 	}
 
+	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
+		DBG(("%s: dst too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     dst->pDrawable->width, dst->pDrawable->height));
+		return TRUE;
+	}
+
+	if (src->pDrawable &&
+	    too_large(src->pDrawable->width, src->pDrawable->height)) {
+		DBG(("%s: src too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     src->pDrawable->width, src->pDrawable->height));
+		return TRUE;
+	}
+
 	return FALSE;
 }
 
@@ -2226,7 +2243,7 @@ gen6_render_composite(struct sna *sna,
 	     width, height, sna->kgem.ring));
 
 	if (mask == NULL &&
-	    try_blt(sna, width, height) &&
+	    try_blt(sna, dst, src, width, height) &&
 	    sna_blt_composite(sna, op,
 			      src, dst,
 			      src_x, src_y,
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 121f137..178b37a 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2190,7 +2190,9 @@ gen7_composite_set_target(struct sna_composite_op *op, PicturePtr dst)
 }
 
 static Bool
-try_blt(struct sna *sna, int width, int height)
+try_blt(struct sna *sna,
+       	PicturePtr dst, PicturePtr src,
+	int width, int height)
 {
 	if (sna->kgem.ring == KGEM_BLT) {
 		DBG(("%s: already performing BLT\n", __FUNCTION__));
@@ -2203,6 +2205,21 @@ try_blt(struct sna *sna, int width, int height)
 		return TRUE;
 	}
 
+	if (too_large(dst->pDrawable->width, dst->pDrawable->height)) {
+		DBG(("%s: dst too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     dst->pDrawable->width, dst->pDrawable->height));
+		return TRUE;
+	}
+
+	if (src->pDrawable &&
+	    too_large(src->pDrawable->width, src->pDrawable->height)) {
+		DBG(("%s: src too large for 3D pipe (%d, %d)\n",
+		     __FUNCTION__,
+		     src->pDrawable->width, src->pDrawable->height));
+		return TRUE;
+	}
+
 	return FALSE;
 }
 
@@ -2339,7 +2356,7 @@ gen7_render_composite(struct sna *sna,
 	     width, height, sna->kgem.ring));
 
 	if (mask == NULL &&
-	    try_blt(sna, width, height) &&
+	    try_blt(sna, dst, src, width, height) &&
 	    sna_blt_composite(sna, op,
 			      src, dst,
 			      src_x, src_y,
commit 7b88f87945f23b3f80ae44b2aa19f00dff1904c8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 18 00:44:00 2011 +0000

    sna: Upload images in place from CopyArea
    
    As for PutImage, if the damage will be immediately flushed out to the
    GPU bo, we may as well do the write directly to the GPU bo and not
    staged via the shadow.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 5b96e67..fdd3e90 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2219,6 +2219,9 @@ static bool copy_use_gpu_bo(struct sna *sna,
 			    struct sna_pixmap *priv,
 			    RegionPtr region)
 {
+	if (priv->flush)
+		return true;
+
 	if (region_inplace(sna, priv->pixmap, region, priv))
 		return true;
 
commit 1418e4f3156a17ff6c5cd0f653918160e30b7c0e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 17 23:11:03 2011 +0000

    sna: Tune the default pixmap upload paths
    
    One issue with the heuristic is that it is based on total pixmap size
    whereas the goal is to pick the placement for the next series of
    operations. The next step in refinement is to combine an overall
    placement to avoid frequent migrations along with a per-operation
    override.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 13bf11f..5b96e67 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -379,12 +379,18 @@ static struct sna_pixmap *_sna_pixmap_attach(struct sna *sna,
 #elif FORCE_INPLACE < 0
 	priv->inplace = 0;
 #else
-	/* If the pixmap is larger than 2x the L2 cache, we presume that
-	 * it will always be quicker to upload directly than to copy via
-	 * the shadow.
+	/* If the typical operation on the pixmap is larger than the
+	 * L2 cache, we presume that it will always be quicker to
+	 * upload directly than to copy via the shadow.
+	 *
+	 * "Typical operation" is currently chosen to maximise
+	 * x11perf on the various chipsets.
 	 */
 	priv->inplace =
-		(pixmap->devKind * pixmap->drawable.height >> 13) > sna->kgem.cpu_cache_pages;
+		(min(pixmap->drawable.width, 500) *
+		 min(pixmap->drawable.height, 500) *
+		 pixmap->drawable.bitsPerPixel >> 15) >
+		sna->kgem.cpu_cache_pages;
 #endif
 	list_init(&priv->list);
 	list_init(&priv->inactive);
@@ -598,6 +604,20 @@ static inline void list_move(struct list *list, struct list *head)
 	list_add(list, head);
 }
 
+static inline bool pixmap_inplace(struct sna *sna,
+				  PixmapPtr pixmap,
+				  struct sna_pixmap *priv)
+{
+	if (!INPLACE_MAP)
+		return false;
+
+	if (priv->inplace)
+		return true;
+
+	return (pixmap->devKind * pixmap->drawable.height >> 12) >
+		sna->kgem.cpu_cache_pages;
+}
+
 bool
 sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 {
@@ -618,7 +638,7 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 	if ((flags & MOVE_READ) == 0) {
 		assert(flags == MOVE_WRITE);
 
-		if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
+		if (priv->gpu_bo && pixmap_inplace(sna, pixmap, priv)) {
 			if (kgem_bo_is_busy(priv->gpu_bo) &&
 			    priv->gpu_bo->exec == NULL)
 				kgem_retire(&sna->kgem);
@@ -743,6 +763,23 @@ static bool sync_will_stall(struct kgem_bo *bo)
 	return kgem_bo_is_busy(bo);
 }
 
+static inline bool region_inplace(struct sna *sna,
+				  PixmapPtr pixmap,
+				  RegionPtr region,
+				  struct sna_pixmap *priv)
+{
+	if (!INPLACE_MAP)
+		return false;
+
+	if (priv->inplace)
+		return true;
+
+	return ((region->extents.x2 - region->extents.x1) *
+		(region->extents.y2 - region->extents.y1) *
+		pixmap->drawable.bitsPerPixel >> 15)
+		> sna->kgem.cpu_cache_pages;
+}
+
 bool
 sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 				RegionPtr region,
@@ -781,7 +818,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	if ((flags & MOVE_READ) == 0) {
 		assert(flags == MOVE_WRITE);
 
-		if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
+		if (priv->gpu_bo && region_inplace(sna, pixmap, region, priv)) {
 			if (sync_will_stall(priv->gpu_bo) &&
 			    priv->gpu_bo->exec == NULL)
 				kgem_retire(&sna->kgem);
@@ -2179,9 +2216,10 @@ fallback:
 }
 
 static bool copy_use_gpu_bo(struct sna *sna,
-			    struct sna_pixmap *priv)
+			    struct sna_pixmap *priv,
+			    RegionPtr region)
 {
-	if (priv->inplace)
+	if (region_inplace(sna, priv->pixmap, region, priv))
 		return true;
 
 	if (!priv->cpu_bo)
@@ -2282,7 +2320,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	}
 
 	if (dst_priv && dst_priv->gpu_bo) {
-		if (!src_priv && !copy_use_gpu_bo(sna, dst_priv)) {
+		if (!src_priv && !copy_use_gpu_bo(sna, dst_priv, &region)) {
 			DBG(("%s: fallback - src_priv=%p and not use dst gpu bo\n",
 			     __FUNCTION__, src_priv));
 			goto fallback;


More information about the xorg-commit mailing list