xf86-video-intel: 14 commits - configure.ac src/intel_display.c src/intel_dri.c src/intel_driver.c src/intel.h src/intel_module.c src/intel_video.c src/Makefile.am src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_composite.c src/sna/sna_display.c src/sna/sna_dri.c src/sna/sna_glyphs.c src/sna/sna_gradient.c src/sna/sna.h src/sna/sna_render.c src/sna/sna_render_inline.h src/sna/sna_tiling.c src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Sat Dec 17 13:36:27 PST 2011


 configure.ac                |    7 
 src/Makefile.am             |    6 
 src/intel.h                 |    1 
 src/intel_display.c         |    1 
 src/intel_dri.c             |    1 
 src/intel_driver.c          |    1 
 src/intel_module.c          |   10 
 src/intel_video.c           |    1 
 src/sna/gen3_render.c       |   21 +
 src/sna/gen4_render.c       |   11 
 src/sna/gen5_render.c       |    8 
 src/sna/gen6_render.c       |   14 -
 src/sna/gen7_render.c       |    8 
 src/sna/kgem.c              |  430 ++++++++++++++++++++++++++---------
 src/sna/kgem.h              |   20 -
 src/sna/sna.h               |   14 -
 src/sna/sna_accel.c         |  539 +++++++++++++++++++++++++++++++-------------
 src/sna/sna_blt.c           |   41 ++-
 src/sna/sna_composite.c     |   52 ++--
 src/sna/sna_display.c       |    4 
 src/sna/sna_dri.c           |   16 -
 src/sna/sna_glyphs.c        |   22 +
 src/sna/sna_gradient.c      |   10 
 src/sna/sna_render.c        |    7 
 src/sna/sna_render_inline.h |   11 
 src/sna/sna_tiling.c        |   27 +-
 src/sna/sna_trapezoids.c    |   70 +++--
 27 files changed, 959 insertions(+), 394 deletions(-)

New commits:
commit 25c353503a25d20e7db5acdc63d83564804efdf4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 17 17:16:07 2011 +0000

    sna: Simplify write domain tracking
    
    Replace the growing bitfield with an enum marking where it was last
    used.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index ebae915..1c6c22e 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1839,8 +1839,7 @@ gen4_composite_set_target(PicturePtr dst, struct sna_composite_op *op)
 	op->damage = &priv->gpu_damage;
 	if (sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height))
 		op->damage = NULL;
-	DBG(("%s: gpu_only=%d, all-damaged=%d, damage=%p\n",
-	     __FUNCTION__, priv->gpu_only,
+	DBG(("%s: all-damaged=%d, damage=%p\n", __FUNCTION__,
 	     sna_damage_is_all(&priv->gpu_damage, op->dst.width, op->dst.height),
 	    op->damage));
 
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index d882ea5..4ec0bd6 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2152,7 +2152,7 @@ gen6_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && (priv->gpu_damage || (priv->cpu_bo && priv->cpu_bo->gpu))) {
+	if (priv && (priv->gpu_damage || (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 5dde9a6..e5fec10 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -241,8 +241,11 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 		return FALSE;
 
 	bo->needs_flush = false;
-	if (bo->gpu)
+	if (bo->domain == DOMAIN_GPU) {
+		kgem->sync = false;
 		kgem_retire(kgem);
+	}
+	bo->domain = DOMAIN_NONE;
 	return TRUE;
 }
 
@@ -296,7 +299,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
 	bo->handle = handle;
 	bo->size = size;
 	bo->reusable = true;
-	bo->cpu = true;
+	bo->domain = DOMAIN_CPU;
 	list_init(&bo->request);
 	list_init(&bo->list);
 	list_init(&bo->vma);
@@ -591,6 +594,9 @@ kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo)
 {
 	struct drm_i915_gem_exec_object2 *exec;
 
+	DBG(("%s: handle=%d, index=%d\n",
+	     __FUNCTION__, bo->handle, kgem->nexec));
+
 	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
 	exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec));
 	exec->handle = bo->handle;
@@ -605,7 +611,6 @@ void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo)
 {
 	bo->exec = kgem_add_handle(kgem, bo);
 	bo->rq = kgem->next_request;
-	bo->gpu = true;
 
 	list_move(&bo->request, &kgem->next_request->buffers);
 
@@ -723,6 +728,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		assert(list_is_empty(&bo->request));
 		list_add(&bo->request, &kgem->flushing);
 		list_move(&bo->list, active(kgem, bo->size));
+		bo->rq = &_kgem_static_request;
 	} else {
 		if (!IS_CPU_MAP(bo->map)) {
 			assert(!bo->purged);
@@ -732,7 +738,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 
 			if (!gem_madvise(kgem->fd, bo->handle,
 					 I915_MADV_DONTNEED)) {
-				kgem->need_purge |= bo->gpu;
+				kgem->need_purge |= bo->domain == DOMAIN_GPU;
 				goto destroy;
 			}
 
@@ -776,8 +782,9 @@ bool kgem_retire(struct kgem *kgem)
 		if (gem_madvise(kgem->fd, bo->handle, I915_MADV_DONTNEED)) {
 			bo->purged = true;
 			bo->needs_flush = false;
-			bo->gpu = false;
-			assert(bo->rq == NULL);
+			bo->domain = DOMAIN_NONE;
+			assert(bo->rq == &_kgem_static_request);
+			bo->rq = NULL;
 			list_move(&bo->list, inactive(kgem, bo->size));
 			list_del(&bo->request);
 		} else
@@ -807,7 +814,8 @@ bool kgem_retire(struct kgem *kgem)
 
 			if (bo->needs_flush)
 				bo->needs_flush = kgem_busy(kgem, bo->handle);
-			bo->gpu = bo->needs_flush;
+			if (!bo->needs_flush)
+				bo->domain = DOMAIN_NONE;
 
 			if (bo->refcnt == 0) {
 				if (bo->reusable) {
@@ -815,6 +823,7 @@ bool kgem_retire(struct kgem *kgem)
 						DBG(("%s: moving %d to flushing\n",
 						     __FUNCTION__, bo->handle));
 						list_add(&bo->request, &kgem->flushing);
+						bo->rq = &_kgem_static_request;
 					} else if(gem_madvise(kgem->fd,
 							      bo->handle,
 							      I915_MADV_DONTNEED)) {
@@ -841,8 +850,8 @@ bool kgem_retire(struct kgem *kgem)
 		assert(rq->bo->refcnt == 0);
 		if (gem_madvise(kgem->fd, rq->bo->handle, I915_MADV_DONTNEED)) {
 			rq->bo->purged = true;
-			assert(rq->bo->gpu == 0);
 			assert(rq->bo->rq == NULL);
+			assert(list_is_empty(&rq->bo->request));
 			list_move(&rq->bo->list, inactive(kgem, rq->bo->size));
 			retired = true;
 		} else {
@@ -871,15 +880,19 @@ static void kgem_commit(struct kgem *kgem)
 		assert(!bo->purged);
 
 		bo->presumed_offset = bo->exec->offset;
-		bo->binding.offset = 0;
 		bo->exec = NULL;
-		bo->dirty = false;
-		bo->cpu = false;
+
+		DBG(("%s: release handle=%d (proxy? %d)\n",
+		     __FUNCTION__, bo->handle, bo->proxy != NULL));
 
 		if (!bo->refcnt && !bo->reusable) {
 			kgem_bo_free(kgem, bo);
 			continue;
 		}
+
+		bo->binding.offset = 0;
+		bo->domain = DOMAIN_GPU;
+		bo->dirty = false;
 	}
 
 	if (rq == &_kgem_static_request) {
@@ -977,7 +990,7 @@ static void kgem_cleanup(struct kgem *kgem)
 					      request);
 			list_del(&bo->request);
 			bo->rq = NULL;
-			bo->gpu = false;
+			bo->domain = DOMAIN_NONE;
 			if (bo->refcnt == 0)
 				kgem_bo_free(kgem, bo);
 		}
@@ -1118,7 +1131,6 @@ void _kgem_submit(struct kgem *kgem)
 		kgem_fixup_self_relocs(kgem, rq->bo);
 		kgem_finish_partials(kgem);
 
-		assert(rq->bo->gpu == 0);
 		if (kgem_batch_write(kgem, handle) == 0) {
 			struct drm_i915_gem_execbuffer2 execbuf;
 			int ret, retry = 3;
@@ -1390,7 +1402,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, bool use_active)
 		if (bo->purged) {
 			if (!gem_madvise(kgem->fd, bo->handle,
 					 I915_MADV_WILLNEED)) {
-				kgem->need_purge |= bo->gpu;
+				kgem->need_purge |= bo->domain == DOMAIN_GPU;
 				continue;
 			}
 
@@ -1418,7 +1430,7 @@ search_linear_cache(struct kgem *kgem, unsigned int size, bool use_active)
 		     use_active ? "active" : "inactive"));
 		assert(bo->refcnt == 0);
 		assert(bo->reusable);
-		assert(use_active || bo->gpu == 0);
+		assert(use_active || bo->domain != DOMAIN_GPU);
 		//assert(use_active || !kgem_busy(kgem, bo->handle));
 		return bo;
 	}
@@ -1702,7 +1714,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 			DBG(("  from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
 			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
 			assert(bo->reusable);
-			assert(bo->gpu == 0 && !kgem_busy(kgem, bo->handle));
+			assert(bo->domain != DOMAIN_GPU && !kgem_busy(kgem, bo->handle));
 			return kgem_bo_reference(bo);
 		}
 
@@ -1746,7 +1758,7 @@ search_active: /* Best active match first */
 			if (bo->purged) {
 				if (!gem_madvise(kgem->fd, bo->handle,
 						 I915_MADV_WILLNEED)) {
-					kgem->need_purge |= bo->gpu;
+					kgem->need_purge |= bo->domain == DOMAIN_GPU;
 					kgem_bo_free(kgem, bo);
 					bo = NULL;
 					goto search_active;
@@ -1800,7 +1812,7 @@ skip_active_search:
 		if (bo->purged) {
 			if (!gem_madvise(kgem->fd, bo->handle,
 					 I915_MADV_WILLNEED)) {
-				kgem->need_purge |= bo->gpu;
+				kgem->need_purge |= bo->domain == DOMAIN_GPU;
 				goto next_bo;
 			}
 
@@ -1817,7 +1829,7 @@ skip_active_search:
 		     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
 		assert(bo->refcnt == 0);
 		assert(bo->reusable);
-		assert((flags & CREATE_INACTIVE) == 0 || bo->gpu == 0);
+		assert((flags & CREATE_INACTIVE) == 0 || bo->domain != DOMAIN_GPU);
 		assert((flags & CREATE_INACTIVE) == 0 ||
 		       !kgem_busy(kgem, bo->handle));
 		return kgem_bo_reference(bo);
@@ -1967,6 +1979,9 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 {
 	int index;
 
+	DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
+	     __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
+
 	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
 	index = kgem->nreloc++;
@@ -1978,6 +1993,9 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 
 		delta += bo->delta;
 		if (bo->proxy) {
+			DBG(("%s: adding proxy for handle=%d\n",
+			     __FUNCTION__, bo->handle));
+			assert(bo->handle == bo->proxy->handle);
 			/* need to release the cache upon batch submit */
 			list_move(&bo->request, &kgem->next_request->buffers);
 			bo->exec = &_kgem_dummy_exec;
@@ -1989,7 +2007,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 		if (bo->exec == NULL)
 			_kgem_add_bo(kgem, bo);
 
-		if (read_write_domain & KGEM_RELOC_FENCED && kgem->gen < 40) {
+		if (kgem->gen < 40 && read_write_domain & KGEM_RELOC_FENCED) {
 			if (bo->tiling &&
 			    (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) {
 				assert(kgem->nfence < kgem->fence_max);
@@ -2049,7 +2067,7 @@ static void kgem_trim_vma_cache(struct kgem *kgem)
 		list_del(&old->vma);
 		kgem->vma_count--;
 
-		if (!old->gpu && old->refcnt == 0)
+		if (old->domain != DOMAIN_GPU && old->refcnt == 0)
 			kgem_bo_free(kgem, old);
 	}
 }
@@ -2091,11 +2109,11 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 		     __FUNCTION__, bo->handle, kgem->vma_count));
 	}
 
-	if (bo->needs_flush | bo->gpu) {
+	if (bo->domain != DOMAIN_GTT) {
 		struct drm_i915_gem_set_domain set_domain;
 
-		DBG(("%s: sync: needs_flush? %d, gpu? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->gpu));
+		DBG(("%s: sync: needs_flush? %d, domain? %d\n", __FUNCTION__,
+		     bo->needs_flush, bo->domain));
 
 		/* XXX use PROT_READ to avoid the write flush? */
 
@@ -2106,10 +2124,11 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
 
 		bo->needs_flush = false;
-		if (bo->gpu) {
+		if (bo->domain == DOMAIN_GPU) {
 			kgem->sync = false;
 			kgem_retire(kgem);
 		}
+		bo->domain = DOMAIN_GTT;
 	}
 
 	list_move_tail(&bo->vma, &kgem->vma_cache);
@@ -2288,11 +2307,11 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 	kgem_bo_submit(kgem, bo);
 
 	/* XXX assumes bo is snoopable */
-	if (!bo->cpu) {
+	if (bo->domain != DOMAIN_CPU) {
 		struct drm_i915_gem_set_domain set_domain;
 
-		DBG(("%s: sync: needs_flush? %d, gpu? %d, busy? %d\n", __FUNCTION__,
-		     bo->needs_flush, bo->gpu, kgem_busy(kgem, bo->handle)));
+		DBG(("%s: sync: needs_flush? %d, domain? %d, busy? %d\n", __FUNCTION__,
+		     bo->needs_flush, bo->domain, kgem_busy(kgem, bo->handle)));
 
 		VG_CLEAR(set_domain);
 		set_domain.handle = bo->handle;
@@ -2302,11 +2321,11 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
 		assert(!kgem_busy(kgem, bo->handle));
 		bo->needs_flush = false;
-		if (bo->gpu) {
+		if (bo->domain == DOMAIN_GPU) {
 			kgem->sync = false;
 			kgem_retire(kgem);
 		}
-		bo->cpu = true;
+		bo->domain = DOMAIN_CPU;
 	}
 }
 
@@ -2732,11 +2751,11 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 			 offset, length);
 		assert(!kgem_busy(kgem, bo->base.handle));
 		bo->base.needs_flush = false;
-		if (bo->base.gpu) {
+		if (bo->base.domain == DOMAIN_GPU) {
 			kgem->sync = false;
 			kgem_retire(kgem);
 		}
-		assert(bo->base.gpu == false);
+		bo->base.domain = DOMAIN_NONE;
 	} else
 		kgem_bo_sync__cpu(kgem, &bo->base);
 }
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index ae6ea47..35acc1d 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -70,8 +70,7 @@ struct kgem_bo {
 	uint32_t tiling : 2;
 	uint32_t reusable : 1;
 	uint32_t dirty : 1;
-	uint32_t gpu : 1;
-	uint32_t cpu : 1;
+	uint32_t domain : 2;
 	uint32_t needs_flush : 1;
 	uint32_t vmap : 1;
 	uint32_t io : 1;
@@ -79,6 +78,10 @@ struct kgem_bo {
 	uint32_t sync : 1;
 	uint32_t purged : 1;
 };
+#define DOMAIN_NONE 0
+#define DOMAIN_CPU 1
+#define DOMAIN_GTT 2
+#define DOMAIN_GPU 3
 
 struct kgem_request {
 	struct list list;
@@ -330,12 +333,10 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 
 static inline bool kgem_bo_is_busy(struct kgem_bo *bo)
 {
-	DBG_HDR(("%s: gpu? %d exec? %d, rq? %d\n",
-		 __FUNCTION__, bo->gpu, bo->exec != NULL, bo->rq != NULL));
-
+	DBG_HDR(("%s: domain: %d exec? %d, rq? %d\n",
+		 __FUNCTION__, bo->domain, bo->exec != NULL, bo->rq != NULL));
 	assert(bo->proxy == NULL);
-	assert(bo->gpu || bo->rq == NULL);
-	return bo->gpu;
+	return bo->rq;
 }
 
 static inline bool kgem_bo_is_dirty(struct kgem_bo *bo)
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index fbecabb..13bf11f 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -619,7 +619,11 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 		assert(flags == MOVE_WRITE);
 
 		if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
-			if (priv->gpu_bo->gpu) {
+			if (kgem_bo_is_busy(priv->gpu_bo) &&
+			    priv->gpu_bo->exec == NULL)
+				kgem_retire(&sna->kgem);
+
+			if (kgem_bo_is_busy(priv->gpu_bo)) {
 				sna_pixmap_destroy_gpu_bo(sna, priv);
 				if (!sna_pixmap_move_to_gpu(pixmap))
 					goto skip_inplace_map;
@@ -642,11 +646,11 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 		}
 
 skip_inplace_map:
-		if (priv->cpu_bo && priv->cpu_bo->gpu) {
+		if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) {
 			if (priv->cpu_bo->exec == NULL)
 				kgem_retire(&sna->kgem);
 
-			if (priv->cpu_bo->gpu) {
+			if (kgem_bo_is_busy(priv->cpu_bo)) {
 				DBG(("%s: discarding busy CPU bo\n", __FUNCTION__));
 				sna_pixmap_free_cpu(sna, priv);
 			}
@@ -736,7 +740,7 @@ region_subsumes_drawable(RegionPtr region, DrawablePtr drawable)
 
 static bool sync_will_stall(struct kgem_bo *bo)
 {
-	return bo->gpu | bo->needs_flush;
+	return kgem_bo_is_busy(bo);
 }
 
 bool
@@ -2183,14 +2187,14 @@ static bool copy_use_gpu_bo(struct sna *sna,
 	if (!priv->cpu_bo)
 	       return false;
 
-	if (priv->cpu_bo->gpu) {
+	if (kgem_bo_is_busy(priv->cpu_bo)) {
 		if (priv->cpu_bo->exec)
 			return true;
 
 		kgem_retire(&sna->kgem);
 	}
 
-	return priv->cpu_bo->gpu;
+	return kgem_bo_is_busy(priv->cpu_bo);
 }
 
 static void
@@ -2257,7 +2261,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	/* Try to maintain the data on the GPU */
 	if (dst_priv && dst_priv->gpu_bo == NULL &&
-	    src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && src_priv->cpu_bo->gpu))) {
+	    src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && kgem_bo_is_busy(src_priv->cpu_bo)))) {
 		uint32_t tiling =
 			sna_pixmap_choose_tiling(dst_pixmap,
 						 src_priv->gpu_bo->tiling);
@@ -8716,7 +8720,7 @@ static void sna_deferred_free(struct sna *sna)
 	struct sna_pixmap *priv, *next;
 
 	list_for_each_entry_safe(priv, next, &sna->deferred_free, list) {
-		if (priv->cpu_bo->gpu)
+		if (kgem_bo_is_busy(priv->cpu_bo))
 			continue;
 
 		list_del(&priv->list);
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index 3b2cd7b..a751354 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -1835,8 +1835,6 @@ sna_page_flip(struct sna *sna,
 	count = do_page_flip(sna, data, ref_crtc_hw_id);
 	DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count));
 	if (count) {
-		bo->gpu = true;
-
 		/* Although the kernel performs an implicit flush upon
 		 * page-flipping, marking the bo as requiring a flush
 		 * here ensures that the buffer goes into the active cache
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 76a1d70..dd94d96 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -295,8 +295,6 @@ static void _sna_dri_destroy_buffer(struct sna *sna, DRI2Buffer2Ptr buffer)
 			screen->DestroyPixmap(private->pixmap);
 		}
 
-		private->bo->gpu =
-			private->bo->needs_flush || private->bo->rq != NULL;
 		private->bo->flush = 0;
 		kgem_bo_destroy(&sna->kgem, private->bo);
 
diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c
index 84d57f4..c870076 100644
--- a/src/sna/sna_gradient.c
+++ b/src/sna/sna_gradient.c
@@ -213,6 +213,8 @@ sna_render_flush_solid(struct sna *sna)
 	struct sna_solid_cache *cache = &sna->render.solid_cache;
 
 	DBG(("sna_render_flush_solid(size=%d)\n", cache->size));
+	assert(cache->dirty);
+	assert(cache->size);
 
 	kgem_bo_write(&sna->kgem, cache->cache_bo,
 		      cache->color, cache->size*sizeof(uint32_t));
@@ -226,10 +228,10 @@ sna_render_finish_solid(struct sna *sna, bool force)
 	struct sna_solid_cache *cache = &sna->render.solid_cache;
 	int i;
 
-	DBG(("sna_render_finish_solid(force=%d, busy=%d, dirty=%d)\n",
-	     force, cache->cache_bo->gpu, cache->dirty));
+	DBG(("sna_render_finish_solid(force=%d, domain=%d, busy=%d, dirty=%d)\n",
+	     force, cache->cache_bo->domain, cache->cache_bo->rq != NULL, cache->dirty));
 
-	if (!force && !cache->cache_bo->gpu)
+	if (!force && cache->cache_bo->domain != DOMAIN_GPU)
 		return;
 
 	if (cache->dirty)
@@ -259,6 +261,8 @@ sna_render_get_solid(struct sna *sna, uint32_t color)
 	struct sna_solid_cache *cache = &sna->render.solid_cache;
 	int i;
 
+	DBG(("%s: %08x\n", __FUNCTION__, color));
+
 	if ((color & 0xffffff) == 0) /* alpha only */
 		return kgem_bo_reference(sna->render.alpha_cache.bo[color>>24]);
 
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index 497c0fd..daa8f6f 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -77,7 +77,7 @@ static inline Bool
 is_busy_cpu(DrawablePtr drawable)
 {
 	struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
-	return priv && priv->cpu_bo && priv->cpu_bo->gpu;
+	return priv && priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo);
 }
 
 static inline Bool
commit d20d167a753d8e4fe581950e1bc49f29e0ec9f1f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 17 16:28:04 2011 +0000

    sna: Upload to large pixmaps inplace
    
    When the pixmap is large, larger than L2 cache size, we are unlikely to
    benefit from first copying the data to a shadow buffer -- as that shadow
    buffer itself will mostly reside in main memory. In such circumstances
    we may as perform the write to the GTT mapping of the GPU bo. As such,
    it is a fragile heuristic that may require further tuning.
    
    Avoiding that extra copy gives a 30% boost to putimage500/shmput500 at
    ~10% cost to putimage10/shmput10 on Atom (945gm/PineView), without any
    noticeable impact upon cairo.
    
    Reported-by: Michael Larabel <Michael at phoronix.com>
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 17eca52..5dde9a6 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -362,6 +362,29 @@ agp_aperture_size(struct pci_device *dev, int gen)
 	return dev->regions[gen < 30 ? 0 : 2].size;
 }
 
+static size_t
+cpu_cache_size(void)
+{
+	FILE *file = fopen("/proc/cpuinfo", "r");
+	size_t size = -1;
+	if (file) {
+		size_t len = 0;
+		char *line = NULL;
+		while (getline(&line, &len, file) != -1) {
+			int mb;
+			if (sscanf(line, "cache size : %d KB", &mb) == 1) {
+				size = mb * 1024;
+				break;
+			}
+		}
+		free(line);
+		fclose(file);
+	}
+	if (size == -1)
+		ErrorF("Unknown CPU cache size\n");
+	return size;
+}
+
 static int gem_param(struct kgem *kgem, int name)
 {
 	drm_i915_getparam_t gp;
@@ -388,6 +411,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	kgem->wedged = drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO;
 	kgem->wedged |= DBG_NO_HW;
 
+	kgem->cpu_cache_pages = cpu_cache_size() >> 12;
+
 	list_init(&kgem->partial);
 	list_init(&kgem->requests);
 	list_init(&kgem->flushing);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index b645568..ae6ea47 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -127,6 +127,7 @@ struct kgem {
 	uint32_t has_relaxed_fencing :1;
 
 	uint16_t fence_max;
+	uint16_t cpu_cache_pages;
 	uint32_t aperture_high, aperture_low, aperture;
 	uint32_t aperture_fenced, aperture_mappable;
 	uint32_t max_object_size;
diff --git a/src/sna/sna.h b/src/sna/sna.h
index a112fe3..6c64d64 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -142,6 +142,8 @@ struct sna_pixmap {
 #define SOURCE_BIAS 4
 	uint16_t source_count;
 	uint8_t pinned :1;
+	uint8_t inplace :1;
+	uint8_t mapped :1;
 	uint8_t flush :1;
 	uint8_t gpu :1;
 	uint8_t freed :1;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 76ac89d..fbecabb 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -55,12 +55,14 @@
 #endif
 
 #define FORCE_GPU_ONLY 0
+#define FORCE_INPLACE 0
 #define FORCE_FALLBACK 0
 #define FORCE_FLUSH 0
 
 #define USE_SPANS 0
 #define USE_ZERO_SPANS 1
 #define USE_BO_FOR_SCRATCH_PIXMAP 1
+#define INPLACE_MAP 1
 
 static int sna_font_key;
 
@@ -171,6 +173,11 @@ static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
 		priv->gpu_bo = NULL;
 	}
 
+	if (priv->mapped) {
+		priv->pixmap->devPrivate.ptr = NULL;
+		priv->mapped = 0;
+	}
+
 	list_del(&priv->inactive);
 
 	/* and reset the upload counter */
@@ -358,7 +365,8 @@ static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna)
 	dixSetPrivate(&pixmap->devPrivates, &sna_pixmap_index, sna);
 }
 
-static struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
+static struct sna_pixmap *_sna_pixmap_attach(struct sna *sna,
+					     PixmapPtr pixmap)
 {
 	struct sna_pixmap *priv;
 
@@ -366,6 +374,18 @@ static struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
 	if (!priv)
 		return NULL;
 
+#if FORCE_INPLACE > 0
+	priv->inplace = 1;
+#elif FORCE_INPLACE < 0
+	priv->inplace = 0;
+#else
+	/* If the pixmap is larger than 2x the L2 cache, we presume that
+	 * it will always be quicker to upload directly than to copy via
+	 * the shadow.
+	 */
+	priv->inplace =
+		(pixmap->devKind * pixmap->drawable.height >> 13) > sna->kgem.cpu_cache_pages;
+#endif
 	list_init(&priv->list);
 	list_init(&priv->inactive);
 	priv->pixmap = pixmap;
@@ -383,6 +403,7 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
 	if (priv)
 		return priv;
 
+	sna = to_sna_from_pixmap(pixmap);
 	switch (pixmap->usage_hint) {
 	case CREATE_PIXMAP_USAGE_GLYPH_PICTURE:
 #if FAKE_CREATE_PIXMAP_USAGE_SCRATCH_HEADER
@@ -395,7 +416,6 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
 		break;
 
 	default:
-		sna = to_sna_from_pixmap(pixmap);
 		if (!kgem_can_create_2d(&sna->kgem,
 					pixmap->drawable.width,
 					pixmap->drawable.height,
@@ -405,7 +425,7 @@ struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap)
 		break;
 	}
 
-	return _sna_pixmap_attach(pixmap);
+	return _sna_pixmap_attach(sna, pixmap);
 }
 
 static inline PixmapPtr
@@ -461,7 +481,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 		if (!pixmap)
 			return NullPixmap;
 
-		priv = _sna_pixmap_attach(pixmap);
+		priv = _sna_pixmap_attach(sna, pixmap);
 		if (!priv) {
 			fbDestroyPixmap(pixmap);
 			return NullPixmap;
@@ -478,6 +498,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 	}
 
 	priv->freed = 1;
+	priv->inplace = 1;
 	sna_damage_all(&priv->gpu_damage, width, height);
 
 	miModifyPixmapHeader(pixmap,
@@ -595,6 +616,32 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 	     __FUNCTION__, priv->gpu_bo, priv->gpu_damage));
 
 	if ((flags & MOVE_READ) == 0) {
+		assert(flags == MOVE_WRITE);
+
+		if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
+			if (priv->gpu_bo->gpu) {
+				sna_pixmap_destroy_gpu_bo(sna, priv);
+				if (!sna_pixmap_move_to_gpu(pixmap))
+					goto skip_inplace_map;
+			}
+
+			pixmap->devPrivate.ptr =
+				kgem_bo_map(&sna->kgem, priv->gpu_bo,
+					    PROT_WRITE);
+			priv->mapped = 1;
+
+			sna_damage_all(&priv->gpu_damage,
+				       pixmap->drawable.width,
+				       pixmap->drawable.height);
+			sna_damage_destroy(&priv->cpu_damage);
+			if (priv->cpu_bo)
+				sna_pixmap_free_cpu(sna, priv);
+
+			priv->gpu = true;
+			return true;
+		}
+
+skip_inplace_map:
 		if (priv->cpu_bo && priv->cpu_bo->gpu) {
 			if (priv->cpu_bo->exec == NULL)
 				kgem_retire(&sna->kgem);
@@ -608,6 +655,11 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 		sna_damage_destroy(&priv->gpu_damage);
 	}
 
+	if (priv->mapped) {
+		pixmap->devPrivate.ptr = NULL;
+		priv->mapped = 0;
+	}
+
 	if (pixmap->devPrivate.ptr == NULL &&
 	    !sna_pixmap_alloc_cpu(sna, pixmap, priv))
 		return false;
@@ -722,18 +774,46 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		return sna_pixmap_move_to_cpu(pixmap, flags);
 	}
 
-	if ((flags & MOVE_READ) == 0 && priv->cpu_bo && !priv->cpu_bo->vmap) {
-		if (sync_will_stall(priv->cpu_bo) && priv->cpu_bo->exec == NULL)
-			kgem_retire(&sna->kgem);
-		if (sync_will_stall(priv->cpu_bo)) {
-			sna_damage_subtract(&priv->cpu_damage, region);
-			if (!sna_pixmap_move_to_gpu(pixmap))
-				return false;
+	if ((flags & MOVE_READ) == 0) {
+		assert(flags == MOVE_WRITE);
 
-			sna_pixmap_free_cpu(sna, priv);
+		if (priv->inplace && priv->gpu_bo && INPLACE_MAP) {
+			if (sync_will_stall(priv->gpu_bo) &&
+			    priv->gpu_bo->exec == NULL)
+				kgem_retire(&sna->kgem);
+
+			if (!sync_will_stall(priv->gpu_bo)) {
+				pixmap->devPrivate.ptr =
+					kgem_bo_map(&sna->kgem, priv->gpu_bo,
+						    PROT_WRITE);
+				priv->mapped = 1;
+
+				sna_damage_subtract(&priv->cpu_damage, region);
+				sna_damage_add(&priv->gpu_damage, region);
+
+				priv->gpu = true;
+				return true;
+			}
+		}
+
+		if (priv->cpu_bo && !priv->cpu_bo->vmap) {
+			if (sync_will_stall(priv->cpu_bo) && priv->cpu_bo->exec == NULL)
+				kgem_retire(&sna->kgem);
+			if (sync_will_stall(priv->cpu_bo)) {
+				sna_damage_subtract(&priv->cpu_damage, region);
+				if (!sna_pixmap_move_to_gpu(pixmap))
+					return false;
+
+				sna_pixmap_free_cpu(sna, priv);
+			}
 		}
 	}
 
+	if (priv->mapped) {
+		pixmap->devPrivate.ptr = NULL;
+		priv->mapped = 0;
+	}
+
 	if (pixmap->devPrivate.ptr == NULL &&
 	    !sna_pixmap_alloc_cpu(sna, pixmap, priv))
 		return false;
@@ -1501,7 +1581,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	 * So we try again with vma caching and only for pixmaps who will be
 	 * immediately flushed...
 	 */
-	if (priv->flush &&
+	if ((priv->flush || (priv->inplace && priv->gpu_bo)) &&
 	    sna_put_image_upload_blt(drawable, gc, region,
 				     x, y, w, h, bits, stride)) {
 		if (region_subsumes_drawable(region, &pixmap->drawable)) {
@@ -2097,6 +2177,9 @@ fallback:
 static bool copy_use_gpu_bo(struct sna *sna,
 			    struct sna_pixmap *priv)
 {
+	if (priv->inplace)
+		return true;
+
 	if (!priv->cpu_bo)
 	       return false;
 
commit dd8fd6c90612ada39eb32b98adc5acc97e7902aa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 17 12:38:09 2011 +0000

    sna: Search through the inactive VMA cache for potential upload bo
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 950b8ec..17eca52 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -392,6 +392,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	list_init(&kgem->requests);
 	list_init(&kgem->flushing);
 	list_init(&kgem->vma_cache);
+	list_init(&kgem->vma_inactive);
 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++)
 		list_init(&kgem->inactive[i]);
 	for (i = 0; i < ARRAY_SIZE(kgem->active); i++)
@@ -716,6 +717,8 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
 		assert(!kgem_busy(kgem, bo->handle));
 		list_move(&bo->list, inactive(kgem, bo->size));
+		if (bo->map)
+			list_move(&bo->vma, &kgem->vma_inactive);
 		kgem->need_expire = true;
 	}
 
@@ -749,6 +752,7 @@ bool kgem_retire(struct kgem *kgem)
 			bo->purged = true;
 			bo->needs_flush = false;
 			bo->gpu = false;
+			assert(bo->rq == NULL);
 			list_move(&bo->list, inactive(kgem, bo->size));
 			list_del(&bo->request);
 		} else
@@ -813,6 +817,7 @@ bool kgem_retire(struct kgem *kgem)
 		if (gem_madvise(kgem->fd, rq->bo->handle, I915_MADV_DONTNEED)) {
 			rq->bo->purged = true;
 			assert(rq->bo->gpu == 0);
+			assert(rq->bo->rq == NULL);
 			list_move(&rq->bo->list, inactive(kgem, rq->bo->size));
 			retired = true;
 		} else {
@@ -1375,6 +1380,10 @@ search_linear_cache(struct kgem *kgem, unsigned int size, bool use_active)
 		list_del(&bo->list);
 		if (bo->rq == NULL)
 			list_del(&bo->request);
+		if (bo->map) {
+			assert(!list_is_empty(&bo->vma));
+			list_move_tail(&bo->vma, &kgem->vma_cache);
+		}
 
 		bo->tiling = I915_TILING_NONE;
 		bo->pitch = 0;
@@ -1624,8 +1633,56 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 				 flags & CREATE_SCANOUT,
 				 width, height, bpp, tiling, &pitch);
 	assert(size && size <= kgem->max_object_size);
-	if (flags & CREATE_INACTIVE)
+
+	if (flags & CREATE_INACTIVE) {
+		/* We presume that we will need to upload to this bo,
+		 * and so would prefer to have an active VMA.
+		 */
+		list_for_each_entry(bo, &kgem->vma_inactive, vma) {
+			assert(bo->refcnt == 0);
+			assert(bo->map);
+			assert(bo->rq == NULL);
+			assert(list_is_empty(&bo->request));
+
+			if (size > bo->size || 2*size < bo->size) {
+				DBG(("inactive vma too small/large: %d < %d\n",
+				     bo->size, size));
+				continue;
+			}
+
+			if (bo->tiling != tiling ||
+			    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
+				DBG(("inactive vma with wrong tiling: %d < %d\n",
+				     bo->tiling, tiling));
+				continue;
+			}
+
+			bo->pitch = pitch;
+			list_del(&bo->list);
+
+			if (bo->purged) {
+				if (!gem_madvise(kgem->fd, bo->handle,
+						 I915_MADV_WILLNEED)) {
+					kgem_bo_free(kgem, bo);
+					break;
+				}
+
+				bo->purged = false;
+			}
+
+			bo->delta = 0;
+			bo->unique_id = kgem_get_unique_id(kgem);
+			list_move_tail(&bo->vma, &kgem->vma_cache);
+			assert(bo->pitch);
+			DBG(("  from inactive vma: pitch=%d, tiling=%d: handle=%d, id=%d\n",
+			     bo->pitch, bo->tiling, bo->handle, bo->unique_id));
+			assert(bo->reusable);
+			assert(bo->gpu == 0 && !kgem_busy(kgem, bo->handle));
+			return kgem_bo_reference(bo);
+		}
+
 		goto skip_active_search;
+	}
 
 	untiled_pitch = kgem_untiled_pitch(kgem,
 					   width, bpp,
@@ -1725,6 +1782,9 @@ skip_active_search:
 			bo->purged = false;
 		}
 
+		if (bo->map)
+			list_move_tail(&bo->vma, &kgem->vma_cache);
+
 		bo->delta = 0;
 		bo->unique_id = kgem_get_unique_id(kgem);
 		assert(bo->pitch);
@@ -1946,9 +2006,15 @@ static void kgem_trim_vma_cache(struct kgem *kgem)
 	while (kgem->vma_count > MAX_VMA_CACHE) {
 		struct kgem_bo *old;
 
-		old = list_first_entry(&kgem->vma_cache,
-				       struct kgem_bo,
-				       vma);
+		if (list_is_empty(&kgem->vma_inactive)) {
+			old = list_first_entry(&kgem->vma_cache,
+					       struct kgem_bo,
+					       vma);
+		} else {
+			old = list_last_entry(&kgem->vma_inactive,
+					      struct kgem_bo,
+					      vma);
+		}
 		DBG(("%s: discarding %s vma cache for %d\n",
 		     __FUNCTION__, IS_CPU_MAP(old->map) ? "CPU" : "GTT",
 		     old->handle));
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index b69f1ee..b645568 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -105,6 +105,7 @@ struct kgem {
 	struct list partial;
 	struct list requests;
 	struct list vma_cache;
+	struct list vma_inactive;
 	struct kgem_request *next_request;
 
 	uint16_t nbatch;
commit 8ef5d8c1955e2e2ee19c64730f600639ac42de55
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Dec 15 17:52:20 2011 +0000

    sna: Map the upload buffer using an LLC bo
    
    In order to avoid having to perform a copy of the cacheable buffer into
    GPU space, we can map a bo as cacheable and write directly to its
    contents. This is only a win on systems that can avoid the clflush, and
    also we have to go to greater measures to avoid unnecessary
    serialisation upon that CPU bo. Sadly, we do not yet go to enough length
    to avoid negatively impacting ShmPutImage, but that does not appear to
    be a artefact of stalling upon a CPU buffer.
    
    Note, LLC is a SandyBridge feature enabled by default in kernel 3.1 and
    later. In time, we should be able to expose similar support for
    snoopable buffers for other generations.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index 1ac72ec..380970d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -262,7 +262,7 @@ fi
 if test "x$DEBUG" != xno; then
 	AC_DEFINE(HAS_EXTRA_DEBUG,1,[Enable additional debugging])
 	PKG_CHECK_MODULES(VALGRIND, [valgrind],
-			  AC_DEFINE([HAVE_VALGRIND], 0, [Use valgind intrinsics to suppress false warings]),)
+			  AC_DEFINE([HAVE_VALGRIND], 1, [Use valgrind intrinsics to suppress false warings]),)
 fi
 if test "x$DEBUG" = xfull; then
 	AC_DEFINE(HAS_DEBUG_FULL,1,[Enable all debugging])
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index ea82b20..d882ea5 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2041,10 +2041,9 @@ gen6_composite_set_target(struct sna *sna,
 	op->dst.width  = op->dst.pixmap->drawable.width;
 	op->dst.height = op->dst.pixmap->drawable.height;
 	op->dst.format = dst->format;
-	priv = sna_pixmap(op->dst.pixmap);
 
 	op->dst.bo = NULL;
-#if USE_VMAP
+	priv = sna_pixmap(op->dst.pixmap);
 	if (priv && priv->gpu_bo == NULL &&
 	    I915_TILING_NONE == kgem_choose_tiling(&sna->kgem,
 						   I915_TILING_X,
@@ -2054,7 +2053,6 @@ gen6_composite_set_target(struct sna *sna,
 		op->dst.bo = priv->cpu_bo;
 		op->damage = &priv->cpu_damage;
 	}
-#endif
 	if (op->dst.bo == NULL) {
 		priv = sna_pixmap_force_to_gpu(op->dst.pixmap);
 		if (priv == NULL)
@@ -2154,7 +2152,7 @@ gen6_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && priv->gpu_damage) {
+	if (priv && (priv->gpu_damage || (priv->cpu_bo && priv->cpu_bo->gpu))) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 9de55ac..950b8ec 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -44,15 +44,20 @@
 #include <memcheck.h>
 #endif
 
-static inline void list_move(struct list *list, struct list *head)
+static inline void _list_del(struct list *list)
 {
 	__list_del(list->prev, list->next);
+}
+
+static inline void list_move(struct list *list, struct list *head)
+{
+	_list_del(list);
 	list_add(list, head);
 }
 
 static inline void list_move_tail(struct list *list, struct list *head)
 {
-	__list_del(list->prev, list->next);
+	_list_del(list);
 	list_add_tail(list, head);
 }
 
@@ -94,6 +99,7 @@ static inline void list_replace(struct list *old,
 
 struct kgem_partial_bo {
 	struct kgem_bo base;
+	void *mem;
 	uint32_t used, alloc;
 	uint32_t need_io : 1;
 	uint32_t write : 1;
@@ -201,8 +207,11 @@ static int gem_read(int fd, uint32_t handle, const void *dst,
 	pread.size = length;
 	pread.data_ptr = (uintptr_t)dst;
 	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread);
-	if (ret)
+	if (ret) {
+		DBG(("%s: failed, errno=%d\n", __FUNCTION__, errno));
+		assert(0);
 		return ret;
+	}
 
 	VG(VALGRIND_MAKE_MEM_DEFINED(dst, length));
 	return 0;
@@ -287,8 +296,7 @@ static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo,
 	bo->handle = handle;
 	bo->size = size;
 	bo->reusable = true;
-	bo->cpu_read = true;
-	bo->cpu_write = true;
+	bo->cpu = true;
 	list_init(&bo->request);
 	list_init(&bo->list);
 	list_init(&bo->vma);
@@ -610,6 +618,7 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 	struct kgem_bo_binding *b;
 
 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
+	assert(bo->exec == NULL);
 
 	b = bo->binding.next;
 	while (b) {
@@ -626,13 +635,19 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 		list_del(&bo->vma);
 		kgem->vma_count--;
 	}
+	assert(list_is_empty(&bo->vma));
 
-	list_del(&bo->list);
-	list_del(&bo->request);
+	_list_del(&bo->list);
+	_list_del(&bo->request);
 	gem_close(kgem->fd, bo->handle);
 	free(bo);
 }
 
+static bool is_mmaped_buffer(struct kgem_partial_bo *bo)
+{
+	return bo->mem != bo+1;
+}
+
 static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 {
 	DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
@@ -646,11 +661,20 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		goto destroy;
 
 	if (bo->io) {
-		/* transfer the handle to a minimum bo */
-		struct kgem_bo *base = malloc(sizeof(*base));
+		struct kgem_partial_bo *io = (struct kgem_partial_bo *)bo;
+		struct kgem_bo *base;
+
+		if (is_mmaped_buffer(io))
+			kgem_bo_unmap__cpu(kgem, bo, io->mem);
+
+		base = malloc(sizeof(*base));
 		if (base) {
+			DBG(("%s: transferring io handle=%d to bo\n",
+			     __FUNCTION__, bo->handle));
+			/* transfer the handle to a minimum bo */
 			memcpy(base, bo, sizeof (*base));
 			base->reusable = true;
+			base->io = false;
 			list_init(&base->list);
 			list_replace(&bo->request, &base->request);
 			list_replace(&bo->vma, &base->vma);
@@ -665,7 +689,6 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		goto destroy;
 	}
 
-	kgem->need_expire = true;
 	if (bo->rq) {
 		DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
 		list_move(&bo->list, active(kgem, bo->size));
@@ -691,7 +714,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		}
 
 		DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
+		assert(!kgem_busy(kgem, bo->handle));
 		list_move(&bo->list, inactive(kgem, bo->size));
+		kgem->need_expire = true;
 	}
 
 	return;
@@ -795,7 +820,7 @@ bool kgem_retire(struct kgem *kgem)
 			kgem_bo_free(kgem, rq->bo);
 		}
 
-		list_del(&rq->list);
+		_list_del(&rq->list);
 		free(rq);
 	}
 
@@ -819,8 +844,7 @@ static void kgem_commit(struct kgem *kgem)
 		bo->binding.offset = 0;
 		bo->exec = NULL;
 		bo->dirty = false;
-		bo->cpu_read = false;
-		bo->cpu_write = false;
+		bo->cpu = false;
 
 		if (!bo->refcnt && !bo->reusable) {
 			kgem_bo_free(kgem, bo);
@@ -831,6 +855,8 @@ static void kgem_commit(struct kgem *kgem)
 	if (rq == &_kgem_static_request) {
 		struct drm_i915_gem_set_domain set_domain;
 
+		DBG(("%s: syncing due to allocation failure\n", __FUNCTION__));
+
 		VG_CLEAR(set_domain);
 		set_domain.handle = rq->bo->handle;
 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
@@ -886,11 +912,11 @@ static void kgem_finish_partials(struct kgem *kgem)
 			     __FUNCTION__, bo->base.handle, bo->used, bo->alloc));
 			assert(!kgem_busy(kgem, bo->base.handle));
 			gem_write(kgem->fd, bo->base.handle,
-				  0, bo->used, bo+1);
+				  0, bo->used, bo->mem);
 			bo->need_io = 0;
 		}
 
-		VG(VALGRIND_MAKE_MEM_NOACCESS(bo+1, bo->alloc));
+		VG(VALGRIND_MAKE_MEM_NOACCESS(bo->mem, bo->alloc));
 		kgem_bo_unref(kgem, &bo->base);
 	}
 }
@@ -926,7 +952,7 @@ static void kgem_cleanup(struct kgem *kgem)
 				kgem_bo_free(kgem, bo);
 		}
 
-		list_del(&rq->list);
+		_list_del(&rq->list);
 		free(rq);
 	}
 
@@ -978,8 +1004,6 @@ void kgem_reset(struct kgem *kgem)
 			bo->binding.offset = 0;
 			bo->exec = NULL;
 			bo->dirty = false;
-			bo->cpu_read = false;
-			bo->cpu_write = false;
 			bo->rq = NULL;
 
 			list_del(&bo->request);
@@ -1155,6 +1179,8 @@ void _kgem_submit(struct kgem *kgem)
 			if (DEBUG_FLUSH_SYNC) {
 				struct drm_i915_gem_set_domain set_domain;
 
+				DBG(("%s: debug sync\n", __FUNCTION__));
+
 				VG_CLEAR(set_domain);
 				set_domain.handle = handle;
 				set_domain.read_domains = I915_GEM_DOMAIN_GTT;
@@ -1175,6 +1201,8 @@ void _kgem_submit(struct kgem *kgem)
 
 	kgem_reset(kgem);
 	kgem->flush_now = 1;
+
+	assert(kgem->next_request != NULL);
 }
 
 void kgem_throttle(struct kgem *kgem)
@@ -1291,6 +1319,8 @@ void kgem_cleanup_cache(struct kgem *kgem)
 				      struct kgem_request,
 				      list);
 
+		DBG(("%s: sync on cleanup\n", __FUNCTION__));
+
 		VG_CLEAR(set_domain);
 		set_domain.handle = rq->bo->handle;
 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
@@ -1669,6 +1699,14 @@ skip_active_search:
 						     bo->handle,
 						     tiling, pitch))
 				goto next_bo;
+
+			if (bo->map) {
+				munmap(CPU_MAP(bo->map), bo->size);
+				bo->map = NULL;
+
+				list_del(&bo->vma);
+				kgem->vma_count--;
+			}
 		}
 
 		bo->pitch = pitch;
@@ -1739,11 +1777,15 @@ void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		kgem_bo_unref(kgem, bo->proxy);
 
 		assert(bo->binding.next == NULL);
-		list_del(&bo->request);
+		assert(bo->map == NULL);
+		_list_del(&bo->request);
 		free(bo);
 		return;
 	}
 
+	if (bo->vmap)
+		kgem_bo_sync__cpu(kgem, bo);
+
 	__kgem_bo_destroy(kgem, bo);
 }
 
@@ -1910,10 +1952,14 @@ static void kgem_trim_vma_cache(struct kgem *kgem)
 		DBG(("%s: discarding %s vma cache for %d\n",
 		     __FUNCTION__, IS_CPU_MAP(old->map) ? "CPU" : "GTT",
 		     old->handle));
+		assert(old->map);
 		munmap(CPU_MAP(old->map), old->size);
 		old->map = NULL;
 		list_del(&old->vma);
 		kgem->vma_count--;
+
+		if (!old->gpu && old->refcnt == 0)
+			kgem_bo_free(kgem, old);
 	}
 }
 
@@ -1957,15 +2003,22 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 	if (bo->needs_flush | bo->gpu) {
 		struct drm_i915_gem_set_domain set_domain;
 
+		DBG(("%s: sync: needs_flush? %d, gpu? %d\n", __FUNCTION__,
+		     bo->needs_flush, bo->gpu));
+
+		/* XXX use PROT_READ to avoid the write flush? */
+
 		VG_CLEAR(set_domain);
 		set_domain.handle = bo->handle;
 		set_domain.read_domains = I915_GEM_DOMAIN_GTT;
-		set_domain.write_domain = prot & PROT_WRITE ? I915_GEM_DOMAIN_GTT : 0;
+		set_domain.write_domain = I915_GEM_DOMAIN_GTT;
 		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
 
 		bo->needs_flush = false;
-		if (bo->gpu)
+		if (bo->gpu) {
+			kgem->sync = false;
 			kgem_retire(kgem);
+		}
 	}
 
 	list_move_tail(&bo->vma, &kgem->vma_cache);
@@ -1986,6 +2039,7 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 		list_del(&bo->vma);
 		kgem->vma_count--;
 		bo->map = NULL;
+		VG(VALGRIND_MALLOCLIKE_BLOCK(ptr, bo->size, 0, 1));
 		return ptr;
 	}
 
@@ -2009,17 +2063,20 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 		return NULL;
 	}
 
-	VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bo->size));
+	VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, bo->size, 0, 1));
 	return (void *)(uintptr_t)mmap_arg.addr_ptr;
 }
 
 void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr)
 {
 	assert(bo->map == NULL);
+	assert(ptr != NULL);
 
 	bo->map = MAKE_CPU_MAP(ptr);
 	list_move(&bo->vma, &kgem->vma_cache);
 	kgem->vma_count++;
+
+	VG(VALGRIND_FREELIKE_BLOCK(ptr, 0));
 }
 
 void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
@@ -2029,6 +2086,7 @@ void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
 
 	DBG(("%s: (debug) releasing vma for handle=%d, count=%d\n",
 	     __FUNCTION__, bo->handle, kgem->vma_count-1));
+	assert(!IS_CPU_MAP(bo->map));
 
 	munmap(CPU_MAP(bo->map), bo->size);
 	bo->map = NULL;
@@ -2057,8 +2115,10 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
 
 	/* The bo is outside of our control, so presume it is written to */
 	bo->needs_flush = true;
-	bo->gpu = true;
-	bo->cpu_read = bo->cpu_write = false;
+
+	/* Henceforth, we need to broadcast all updates to clients and
+	 * flush our rendering before doing so.
+	 */
 	bo->flush = 1;
 	if (bo->exec)
 		kgem->flush = 1;
@@ -2132,33 +2192,37 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem,
 }
 #endif
 
-void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write)
+void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 {
-	struct drm_i915_gem_set_domain set_domain;
-
 	kgem_bo_submit(kgem, bo);
-	if (for_write ? bo->cpu_write : bo->cpu_read)
-		return;
 
-	VG_CLEAR(set_domain);
-	set_domain.handle = bo->handle;
-	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
-	set_domain.write_domain = for_write ? I915_GEM_DOMAIN_CPU : 0;
+	/* XXX assumes bo is snoopable */
+	if (!bo->cpu) {
+		struct drm_i915_gem_set_domain set_domain;
 
-	drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
-	assert(!kgem_busy(kgem, bo->handle));
-	bo->needs_flush = false;
-	if (bo->gpu) {
-		kgem->sync = false;
-		kgem_retire(kgem);
+		DBG(("%s: sync: needs_flush? %d, gpu? %d, busy? %d\n", __FUNCTION__,
+		     bo->needs_flush, bo->gpu, kgem_busy(kgem, bo->handle)));
+
+		VG_CLEAR(set_domain);
+		set_domain.handle = bo->handle;
+		set_domain.read_domains = I915_GEM_DOMAIN_CPU;
+		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
+
+		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
+		assert(!kgem_busy(kgem, bo->handle));
+		bo->needs_flush = false;
+		if (bo->gpu) {
+			kgem->sync = false;
+			kgem_retire(kgem);
+		}
+		bo->cpu = true;
 	}
-	bo->cpu_read = true;
-	if (for_write)
-		bo->cpu_write = true;
 }
 
 void kgem_sync(struct kgem *kgem)
 {
+	DBG(("%s\n", __FUNCTION__));
+
 	if (!list_is_empty(&kgem->requests)) {
 		struct drm_i915_gem_set_domain set_domain;
 		struct kgem_request *rq;
@@ -2336,12 +2400,61 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			__kgem_bo_init(&bo->base, handle, alloc);
 			bo->base.vmap = true;
 			bo->need_io = 0;
+			bo->mem = bo + 1;
 			goto init;
 		} else
 			free(bo);
 	}
 
-	{
+	if (!DEBUG_NO_LLC && kgem->gen >= 60) {
+		struct kgem_bo *old;
+
+		bo = malloc(sizeof(*bo));
+		if (bo == NULL)
+			return NULL;
+
+		/* Be a little more generous and hope to hold fewer mmappings */
+		alloc = ALIGN(size, 128*1024);
+
+		old = NULL;
+		if (!write)
+			old = search_linear_cache(kgem, alloc, true);
+		if (old == NULL)
+			old = search_linear_cache(kgem, alloc, false);
+		if (old) {
+			DBG(("%s: reusing handle=%d for buffer\n",
+			     __FUNCTION__, old->handle));
+
+			memcpy(&bo->base, old, sizeof(*old));
+			if (old->rq)
+				list_replace(&old->request, &bo->base.request);
+			else
+				list_init(&bo->base.request);
+			list_replace(&old->vma, &bo->base.vma);
+			free(old);
+			bo->base.refcnt = 1;
+		} else {
+			if (!__kgem_bo_init(&bo->base,
+					    gem_create(kgem->fd, alloc),
+					    alloc)) {
+				free(bo);
+				return NULL;
+			}
+			DBG(("%s: created handle=%d for buffer\n",
+			     __FUNCTION__, bo->base.handle));
+		}
+
+		bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
+		if (bo->mem == NULL) {
+			kgem_bo_free(kgem, &bo->base);
+			return NULL;
+		}
+
+		bo->need_io = false;
+		bo->base.io = true;
+
+		alloc = bo->base.size;
+	} else {
 		struct kgem_bo *old;
 
 		old = NULL;
@@ -2355,14 +2468,16 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			if (bo == NULL)
 				return NULL;
 
+			bo->mem = bo + 1;
+			bo->need_io = write;
+
 			memcpy(&bo->base, old, sizeof(*old));
 			if (old->rq)
 				list_replace(&old->request,
 					     &bo->base.request);
 			else
 				list_init(&bo->base.request);
-			list_replace(&old->vma,
-				     &bo->base.vma);
+			list_replace(&old->vma, &bo->base.vma);
 			free(old);
 			bo->base.refcnt = 1;
 		} else {
@@ -2376,9 +2491,10 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 				free(bo);
 				return NULL;
 			}
+			bo->mem = bo + 1;
+			bo->need_io = write;
 		}
-		bo->need_io = write;
-		bo->base.io = write;
+		bo->base.io = true;
 	}
 init:
 	bo->base.reusable = false;
@@ -2409,13 +2525,11 @@ done:
 					     struct kgem_partial_bo,
 					     base.list);
 		}
-		if (p != first) {
-			__list_del(bo->base.list.prev, bo->base.list.next);
-			list_add_tail(&bo->base.list, &p->base.list);
-		}
+		if (p != first)
+			list_move_tail(&bo->base.list, &p->base.list);
 		assert(validate_partials(kgem));
 	}
-	*ret = (char *)(bo+1) + offset;
+	*ret = (char *)bo->mem + offset;
 	return kgem_create_proxy(&bo->base, offset, size);
 }
 
@@ -2511,24 +2625,29 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 	struct kgem_partial_bo *bo;
 	uint32_t offset = _bo->delta, length = _bo->size;
 
+	assert(_bo->exec == NULL);
 	if (_bo->proxy)
 		_bo = _bo->proxy;
+	assert(_bo->exec == NULL);
 
 	bo = (struct kgem_partial_bo *)_bo;
 
 	DBG(("%s(offset=%d, length=%d, vmap=%d)\n", __FUNCTION__,
 	     offset, length, bo->base.vmap));
 
-	if (!bo->base.vmap) {
+	if (!bo->base.vmap && !is_mmaped_buffer(bo)) {
 		gem_read(kgem->fd,
 			 bo->base.handle, (char *)(bo+1)+offset,
 			 offset, length);
 		assert(!kgem_busy(kgem, bo->base.handle));
 		bo->base.needs_flush = false;
-		if (bo->base.gpu)
+		if (bo->base.gpu) {
+			kgem->sync = false;
 			kgem_retire(kgem);
+		}
+		assert(bo->base.gpu == false);
 	} else
-		kgem_bo_sync(kgem, &bo->base, false);
+		kgem_bo_sync__cpu(kgem, &bo->base);
 }
 
 uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format)
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 2fd5a55..b69f1ee 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -71,9 +71,8 @@ struct kgem_bo {
 	uint32_t reusable : 1;
 	uint32_t dirty : 1;
 	uint32_t gpu : 1;
+	uint32_t cpu : 1;
 	uint32_t needs_flush : 1;
-	uint32_t cpu_read : 1;
-	uint32_t cpu_write : 1;
 	uint32_t vmap : 1;
 	uint32_t io : 1;
 	uint32_t flush : 1;
@@ -320,6 +319,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot);
 void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo);
 void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
+void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo);
 void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
 uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
 
@@ -352,7 +352,6 @@ static inline void kgem_bo_mark_dirty(struct kgem_bo *bo)
 	bo->dirty = true;
 }
 
-void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write);
 void kgem_sync(struct kgem *kgem);
 
 #define KGEM_BUFFER_WRITE	0x1
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 3d13f8d..a112fe3 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -94,6 +94,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #define DEBUG_NO_RENDER 0
 #define DEBUG_NO_BLT 0
 #define DEBUG_NO_IO 0
+#define DEBUG_NO_LLC 0
 
 #define DEBUG_FLUSH_CACHE 0
 #define DEBUG_FLUSH_BATCH 0
@@ -141,7 +142,6 @@ struct sna_pixmap {
 #define SOURCE_BIAS 4
 	uint16_t source_count;
 	uint8_t pinned :1;
-	uint8_t gpu_only :1;
 	uint8_t flush :1;
 	uint8_t gpu :1;
 	uint8_t freed :1;
@@ -428,19 +428,21 @@ PixmapPtr sna_pixmap_create_upload(ScreenPtr screen,
 struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap);
 struct sna_pixmap *sna_pixmap_force_to_gpu(PixmapPtr pixmap);
 
-bool must_check sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write);
+#define MOVE_WRITE 0x1
+#define MOVE_READ 0x2
+bool must_check sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned flags);
 bool must_check sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 						RegionPtr region,
-						Bool write);
+						unsigned flags);
 
 static inline bool must_check
-sna_drawable_move_to_cpu(DrawablePtr drawable, bool write)
+sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags)
 {
 	RegionRec region;
 
 	pixman_region_init_rect(&region,
 				0, 0, drawable->width, drawable->height);
-	return sna_drawable_move_region_to_cpu(drawable, &region, write);
+	return sna_drawable_move_region_to_cpu(drawable, &region, flags);
 }
 
 static inline bool must_check
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8957a66..76ac89d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -61,7 +61,6 @@
 #define USE_SPANS 0
 #define USE_ZERO_SPANS 1
 #define USE_BO_FOR_SCRATCH_PIXMAP 1
-#define USE_LLC_CPU_BO 1
 
 static int sna_font_key;
 
@@ -183,7 +182,10 @@ sna_pixmap_alloc_cpu(struct sna *sna,
 		     PixmapPtr pixmap,
 		     struct sna_pixmap *priv)
 {
-	if (USE_LLC_CPU_BO && sna->kgem.gen >= 60) {
+	assert(priv->ptr == NULL);
+	assert(pixmap->devKind);
+
+	if (!DEBUG_NO_LLC && sna->kgem.gen >= 60) {
 		DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
 		     pixmap->drawable.width, pixmap->drawable.height));
 
@@ -225,7 +227,9 @@ static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
 		priv->cpu_bo = NULL;
 	} else
 		free(priv->ptr);
+
 	priv->pixmap->devPrivate.ptr = priv->ptr = NULL;
+	list_del(&priv->list);
 }
 
 static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
@@ -246,11 +250,10 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 		sna_pixmap_free_cpu(sna, priv);
 
 	if (priv->cpu_bo) {
-		if (kgem_bo_is_busy(priv->cpu_bo)) {
+		if (priv->cpu_bo->vmap && kgem_bo_is_busy(priv->cpu_bo)) {
 			list_add_tail(&priv->list, &sna->deferred_free);
 			return false;
 		}
-		kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
 		kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
 	}
 
@@ -474,7 +477,6 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 		return NullPixmap;
 	}
 
-	priv->gpu_only = 1;
 	priv->freed = 1;
 	sna_damage_all(&priv->gpu_damage, width, height);
 
@@ -576,12 +578,12 @@ static inline void list_move(struct list *list, struct list *head)
 }
 
 bool
-sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
+sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 {
 	struct sna *sna = to_sna_from_pixmap(pixmap);
 	struct sna_pixmap *priv;
 
-	DBG(("%s(pixmap=%p, write=%d)\n", __FUNCTION__, pixmap, write));
+	DBG(("%s(pixmap=%p, flags=%x)\n", __FUNCTION__, pixmap, flags));
 
 	priv = sna_pixmap(pixmap);
 	if (priv == NULL) {
@@ -589,17 +591,27 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
 		return true;
 	}
 
-	DBG(("%s: gpu_bo=%p, gpu_damage=%p, gpu_only=%d\n",
-	     __FUNCTION__, priv->gpu_bo, priv->gpu_damage, priv->gpu_only));
+	DBG(("%s: gpu_bo=%p, gpu_damage=%p\n",
+	     __FUNCTION__, priv->gpu_bo, priv->gpu_damage));
 
-	if (pixmap->devPrivate.ptr == NULL) {
-		assert(priv->ptr == NULL);
-		assert(pixmap->devKind);
-		assert(priv->cpu_damage == NULL);
-		if (!sna_pixmap_alloc_cpu(sna, pixmap, priv))
-			return false;
+	if ((flags & MOVE_READ) == 0) {
+		if (priv->cpu_bo && priv->cpu_bo->gpu) {
+			if (priv->cpu_bo->exec == NULL)
+				kgem_retire(&sna->kgem);
+
+			if (priv->cpu_bo->gpu) {
+				DBG(("%s: discarding busy CPU bo\n", __FUNCTION__));
+				sna_pixmap_free_cpu(sna, priv);
+			}
+		}
+
+		sna_damage_destroy(&priv->gpu_damage);
 	}
 
+	if (pixmap->devPrivate.ptr == NULL &&
+	    !sna_pixmap_alloc_cpu(sna, pixmap, priv))
+		return false;
+
 	if (priv->gpu_bo == NULL) {
 		DBG(("%s: no GPU bo\n", __FUNCTION__));
 		goto done;
@@ -638,10 +650,10 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
 done:
 	if (priv->cpu_bo) {
 		DBG(("%s: syncing CPU bo\n", __FUNCTION__));
-		kgem_bo_sync(&sna->kgem, priv->cpu_bo, write);
+		kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
 	}
 
-	if (write) {
+	if (flags & MOVE_WRITE) {
 		DBG(("%s: marking as damaged\n", __FUNCTION__));
 		sna_damage_all(&priv->cpu_damage,
 			       pixmap->drawable.width,
@@ -670,22 +682,27 @@ region_subsumes_drawable(RegionPtr region, DrawablePtr drawable)
 		extents->y2 >= drawable->height;
 }
 
+static bool sync_will_stall(struct kgem_bo *bo)
+{
+	return bo->gpu | bo->needs_flush;
+}
+
 bool
 sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 				RegionPtr region,
-				Bool write)
+				unsigned flags)
 {
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	struct sna *sna = to_sna_from_pixmap(pixmap);
 	struct sna_pixmap *priv;
 	int16_t dx, dy;
 
-	DBG(("%s(pixmap=%p (%dx%d), [(%d, %d), (%d, %d)], write=%d)\n",
+	DBG(("%s(pixmap=%p (%dx%d), [(%d, %d), (%d, %d)], flags=%d)\n",
 	     __FUNCTION__, pixmap,
 	     pixmap->drawable.width, pixmap->drawable.height,
 	     RegionExtents(region)->x1, RegionExtents(region)->y1,
 	     RegionExtents(region)->x2, RegionExtents(region)->y2,
-	     write));
+	     flags));
 
 	priv = sna_pixmap(pixmap);
 	if (priv == NULL) {
@@ -702,17 +719,25 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		DBG(("%s: region subsumes drawable\n", __FUNCTION__));
 		if (dx | dy)
 			RegionTranslate(region, -dx, -dy);
-		return sna_pixmap_move_to_cpu(pixmap, write);
+		return sna_pixmap_move_to_cpu(pixmap, flags);
 	}
 
-	if (pixmap->devPrivate.ptr == NULL) {
-		assert(priv->ptr == NULL);
-		assert(pixmap->devKind);
-		assert(priv->cpu_damage == NULL);
-		if (!sna_pixmap_alloc_cpu(sna, pixmap, priv))
-			return false;
+	if ((flags & MOVE_READ) == 0 && priv->cpu_bo && !priv->cpu_bo->vmap) {
+		if (sync_will_stall(priv->cpu_bo) && priv->cpu_bo->exec == NULL)
+			kgem_retire(&sna->kgem);
+		if (sync_will_stall(priv->cpu_bo)) {
+			sna_damage_subtract(&priv->cpu_damage, region);
+			if (!sna_pixmap_move_to_gpu(pixmap))
+				return false;
+
+			sna_pixmap_free_cpu(sna, priv);
+		}
 	}
 
+	if (pixmap->devPrivate.ptr == NULL &&
+	    !sna_pixmap_alloc_cpu(sna, pixmap, priv))
+		return false;
+
 	if (priv->gpu_bo == NULL)
 		goto done;
 
@@ -723,9 +748,12 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		     region->extents.x2 - region->extents.x1,
 		     region->extents.y2 - region->extents.y1));
 
-		if (!write &&
-		    region->extents.x2 - region->extents.x1 == 1 &&
-		    region->extents.y2 - region->extents.y1 == 1) {
+		if ((flags & MOVE_READ) == 0) {
+			assert(flags == MOVE_WRITE);
+			sna_damage_subtract(&priv->gpu_damage, region);
+		} else if ((flags & MOVE_WRITE) == 0 &&
+			   region->extents.x2 - region->extents.x1 == 1 &&
+			   region->extents.y2 - region->extents.y1 == 1) {
 			/*  Often associated with synchronisation, KISS */
 			sna_read_boxes(sna,
 				       priv->gpu_bo, 0, 0,
@@ -796,10 +824,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 done:
 	if (priv->cpu_bo) {
 		DBG(("%s: syncing cpu bo\n", __FUNCTION__));
-		kgem_bo_sync(&sna->kgem, priv->cpu_bo, write);
+		kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
 	}
 
-	if (write) {
+	if (flags & MOVE_WRITE) {
 		DBG(("%s: applying cpu damage\n", __FUNCTION__));
 		assert_pixmap_contains_box(pixmap, RegionExtents(region));
 		sna_damage_add(&priv->cpu_damage, region);
@@ -881,7 +909,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box)
 done:
 	if (priv->cpu_damage == NULL)
 		list_del(&priv->list);
-	if (!priv->gpu_only && !priv->pinned)
+	if (!priv->pinned)
 		list_move(&priv->inactive, &sna->active_pixmaps);
 }
 
@@ -924,13 +952,11 @@ _sna_drawable_use_gpu_bo(DrawablePtr drawable,
 
 	sna_pixmap_move_area_to_gpu(pixmap, &extents);
 done:
-	if (damage) {
-		if (sna_damage_contains_box(priv->gpu_damage,
-					    &extents) != PIXMAN_REGION_IN)
-			*damage = &priv->gpu_damage;
-		else
-			*damage = NULL;
-	}
+	if (sna_damage_contains_box(priv->gpu_damage,
+				    &extents) != PIXMAN_REGION_IN)
+		*damage = &priv->gpu_damage;
+	else
+		*damage = NULL;
 
 	return TRUE;
 }
@@ -973,17 +999,15 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
 		goto done;
 
 	if (sna_damage_contains_box(priv->gpu_damage,
-				       &extents) != PIXMAN_REGION_OUT)
+				    &extents) != PIXMAN_REGION_OUT)
 		return FALSE;
 
 done:
-	if (damage) {
-		if (sna_damage_contains_box(priv->cpu_damage,
-					    &extents) != PIXMAN_REGION_IN)
-			*damage = &priv->cpu_damage;
-		else
-			*damage = NULL;
-	}
+	if (sna_damage_contains_box(priv->cpu_damage,
+				    &extents) != PIXMAN_REGION_IN)
+		*damage = &priv->cpu_damage;
+	else
+		*damage = NULL;
 
 	return TRUE;
 }
@@ -1053,7 +1077,6 @@ sna_pixmap_create_upload(ScreenPtr screen,
 	priv->cpu_bo = NULL;
 	priv->cpu_damage = priv->gpu_damage = NULL;
 	priv->ptr = NULL;
-	priv->gpu_only = 0;
 	priv->pinned = 0;
 	priv->freed = 1;
 	list_init(&priv->list);
@@ -1195,7 +1218,7 @@ done:
 			      pixmap->drawable.width,
 			      pixmap->drawable.height);
 	list_del(&priv->list);
-	if (!priv->gpu_only && !priv->pinned)
+	if (!priv->pinned)
 		list_move(&priv->inactive, &sna->active_pixmaps);
 	priv->gpu = true;
 	return priv;
@@ -1209,7 +1232,7 @@ static bool must_check sna_validate_pixmap(DrawablePtr draw, PixmapPtr pixmap)
 	    FbEvenTile(pixmap->drawable.width *
 		       pixmap->drawable.bitsPerPixel)) {
 		DBG(("%s: flushing pixmap\n", __FUNCTION__));
-		ret = sna_pixmap_move_to_cpu(pixmap, true);
+		ret = sna_pixmap_move_to_cpu(pixmap, MOVE_READ | MOVE_WRITE);
 	}
 
 	return ret;
@@ -1242,7 +1265,7 @@ static bool must_check sna_gc_move_to_cpu(GCPtr gc, DrawablePtr drawable)
 
 		if (changes & GCStipple && gc->stipple) {
 			DBG(("%s: flushing stipple pixmap\n", __FUNCTION__));
-			if (!sna_pixmap_move_to_cpu(gc->stipple, false))
+			if (!sna_pixmap_move_to_cpu(gc->stipple, MOVE_READ))
 				return false;
 		}
 
@@ -1255,10 +1278,10 @@ static bool must_check sna_gc_move_to_cpu(GCPtr gc, DrawablePtr drawable)
 
 	switch (gc->fillStyle) {
 	case FillTiled:
-		return sna_drawable_move_to_cpu(&gc->tile.pixmap->drawable, false);
+		return sna_drawable_move_to_cpu(&gc->tile.pixmap->drawable, MOVE_READ);
 	case FillStippled:
 	case FillOpaqueStippled:
-		return sna_drawable_move_to_cpu(&gc->stipple->drawable, false);
+		return sna_drawable_move_to_cpu(&gc->stipple->drawable, MOVE_READ);
 	default:
 		return true;
 	}
@@ -1431,7 +1454,6 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 					    pixmap, src_bo, -x, -y,
 					    pixmap, priv->gpu_bo, 0, 0,
 					    box, nbox);
-		kgem_bo_sync(&sna->kgem, src_bo, true);
 		kgem_bo_destroy(&sna->kgem, src_bo);
 	}
 
@@ -1462,6 +1484,8 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	int16_t dx, dy;
 	int n;
 
+	assert_pixmap_contains_box(pixmap, RegionExtents(region));
+
 	if (gc->alu != GXcopy)
 		return false;
 
@@ -1486,7 +1510,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 				       pixmap->drawable.width,
 				       pixmap->drawable.height);
 		} else {
-			assert_pixmap_contains_box(pixmap, RegionExtents(region));
 			sna_damage_subtract(&priv->cpu_damage, region);
 			sna_damage_add(&priv->gpu_damage, region);
 		}
@@ -1494,8 +1517,46 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		return true;
 	}
 
-	if (priv->cpu_bo)
-		kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
+	if (priv->cpu_bo) {
+		/* If the GPU is currently accessing the CPU pixmap, then
+		 * we will need to wait for that to finish before we can
+		 * modify the memory.
+		 *
+		 * However, we can queue some writes to the GPU bo to avoid
+		 * the wait. Or we can try to replace the CPU bo.
+		 */
+		if (sync_will_stall(priv->cpu_bo) && priv->cpu_bo->exec == NULL)
+			kgem_retire(&sna->kgem);
+		if (sync_will_stall(priv->cpu_bo)) {
+			if (priv->cpu_bo->vmap) {
+				if (sna_put_image_upload_blt(drawable, gc, region,
+							     x, y, w, h, bits, stride)) {
+					if (region_subsumes_drawable(region, &pixmap->drawable)) {
+						sna_damage_destroy(&priv->cpu_damage);
+						sna_damage_all(&priv->gpu_damage,
+							       pixmap->drawable.width,
+							       pixmap->drawable.height);
+					} else {
+						sna_damage_subtract(&priv->cpu_damage, region);
+						sna_damage_add(&priv->gpu_damage, region);
+					}
+
+					return true;
+				}
+			} else {
+				if (!region_subsumes_drawable(region, &pixmap->drawable)) {
+					sna_damage_subtract(&priv->cpu_damage, region);
+					if (!sna_pixmap_move_to_gpu(pixmap))
+						return false;
+				}
+
+				sna_pixmap_free_cpu(sna, priv);
+			}
+		}
+
+		if (priv->cpu_bo)
+			kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
+	}
 
 	if (pixmap->devPrivate.ptr == NULL &&
 	    !sna_pixmap_alloc_cpu(sna, pixmap, priv))
@@ -1508,7 +1569,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 			       pixmap->drawable.height);
 		sna_pixmap_destroy_gpu_bo(sna, priv);
 	} else {
-		assert_pixmap_contains_box(pixmap, RegionExtents(region));
 		sna_damage_subtract(&priv->gpu_damage, region);
 		sna_damage_add(&priv->cpu_damage, region);
 		if (priv->gpu_bo &&
@@ -1909,7 +1969,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	DBG(("%s: fbPutImage(%d, %d, %d, %d)\n",
@@ -1986,7 +2047,7 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 fallback:
 		DBG(("%s: fallback", __FUNCTION__));
-		if (!sna_pixmap_move_to_cpu(pixmap, true))
+		if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ | MOVE_WRITE))
 			return;
 
 		stride = pixmap->devKind;
@@ -2033,6 +2094,22 @@ fallback:
 	}
 }
 
+static bool copy_use_gpu_bo(struct sna *sna,
+			    struct sna_pixmap *priv)
+{
+	if (!priv->cpu_bo)
+	       return false;
+
+	if (priv->cpu_bo->gpu) {
+		if (priv->cpu_bo->exec)
+			return true;
+
+		kgem_retire(&sna->kgem);
+	}
+
+	return priv->cpu_bo->gpu;
+}
+
 static void
 sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	       BoxPtr box, int n,
@@ -2097,7 +2174,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	/* Try to maintain the data on the GPU */
 	if (dst_priv && dst_priv->gpu_bo == NULL &&
-	    src_priv && src_priv->gpu_bo != NULL) {
+	    src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && src_priv->cpu_bo->gpu))) {
 		uint32_t tiling =
 			sna_pixmap_choose_tiling(dst_pixmap,
 						 src_priv->gpu_bo->tiling);
@@ -2118,10 +2195,9 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	}
 
 	if (dst_priv && dst_priv->gpu_bo) {
-		if (!src_priv && !dst_priv->gpu_only) {
-			DBG(("%s: fallback - src_priv=%p but dst gpu_only=%d\n",
-			     __FUNCTION__,
-			     src_priv, dst_priv->gpu_only));
+		if (!src_priv && !copy_use_gpu_bo(sna, dst_priv)) {
+			DBG(("%s: fallback - src_priv=%p and not use dst gpu bo\n",
+			     __FUNCTION__, src_priv));
 			goto fallback;
 		}
 
@@ -2158,6 +2234,31 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 							    &region);
 				RegionTranslate(&region, -dst_dx, -dst_dy);
 			}
+		} else if (src_priv && src_priv->cpu_bo) {
+			if (!sna->render.copy_boxes(sna, alu,
+						    src_pixmap, src_priv->cpu_bo, src_dx, src_dy,
+						    dst_pixmap, dst_priv->gpu_bo, dst_dx, dst_dy,
+						    box, n)) {
+				DBG(("%s: fallback - accelerated copy boxes failed\n",
+				     __FUNCTION__));
+				goto fallback;
+			}
+
+			if (replaces) {
+				sna_damage_destroy(&dst_priv->cpu_damage);
+				sna_damage_all(&dst_priv->gpu_damage,
+					       dst_pixmap->drawable.width,
+					       dst_pixmap->drawable.height);
+			} else {
+				RegionTranslate(&region, dst_dx, dst_dy);
+				assert_pixmap_contains_box(dst_pixmap,
+							   RegionExtents(&region));
+				sna_damage_add(&dst_priv->gpu_damage, &region);
+				if (alu == GXcopy)
+					sna_damage_subtract(&dst_priv->cpu_damage,
+							    &region);
+				RegionTranslate(&region, -dst_dx, -dst_dy);
+			}
 		} else if (alu != GXcopy) {
 			PixmapPtr tmp;
 			int i;
@@ -2218,7 +2319,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			if (src_priv) {
 				RegionTranslate(&region, src_dx, src_dy);
 				if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
-								&region, false))
+								&region, MOVE_READ))
 					goto out;
 				RegionTranslate(&region, -src_dx, -src_dy);
 			}
@@ -2271,39 +2372,24 @@ fallback:
 		if (src_priv) {
 			RegionTranslate(&region, src_dx, src_dy);
 			if (!sna_drawable_move_region_to_cpu(&src_pixmap->drawable,
-							     &region, false))
+							     &region, MOVE_READ))
 				goto out;
 			RegionTranslate(&region, -src_dx, -src_dy);
 		}
 
 		RegionTranslate(&region, dst_dx, dst_dy);
 		if (dst_priv) {
-			if (alu == GXcopy) {
-				if (replaces) {
-					sna_damage_all(&dst_priv->cpu_damage,
-						       dst_pixmap->drawable.width,
-						       dst_pixmap->drawable.height);
-					sna_pixmap_destroy_gpu_bo(sna, dst_priv);
-				} else {
-					assert_pixmap_contains_box(dst_pixmap,
-								   RegionExtents(&region));
-					sna_damage_subtract(&dst_priv->gpu_damage,
-							    &region);
-					sna_damage_add(&dst_priv->cpu_damage,
-						       &region);
-					if (dst_priv->flush)
-						list_move(&dst_priv->list,
-							  &sna->dirty_pixmaps);
-				}
+			unsigned mode;
 
-				if (dst_pixmap->devPrivate.ptr == NULL &&
-				    !sna_pixmap_alloc_cpu(sna, dst_pixmap, dst_priv))
-					goto out;
-			} else {
-				if (!sna_drawable_move_region_to_cpu(&dst_pixmap->drawable,
-								     &region, true))
-					goto out;
-			}
+			assert_pixmap_contains_box(dst_pixmap,
+						   RegionExtents(&region));
+
+			mode = MOVE_WRITE;
+			if (alu != GXcopy)
+				mode |= MOVE_READ;
+			if (!sna_drawable_move_region_to_cpu(&dst_pixmap->drawable,
+							     &region, mode))
+				goto out;
 		}
 
 		dst_stride = dst_pixmap->devKind;
@@ -2404,13 +2490,13 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 		if (!sna_gc_move_to_cpu(gc, dst))
 			goto out;
 
-		if (!sna_drawable_move_region_to_cpu(dst, &region, true))
+		if (!sna_drawable_move_region_to_cpu(dst, &region, MOVE_READ | MOVE_WRITE))
 			goto out;
 
 		RegionTranslate(&region,
 				src_x - dst_x - dst->x + src->x,
 				src_y - dst_y - dst->y + src->y);
-		if (!sna_drawable_move_region_to_cpu(src, &region, false))
+		if (!sna_drawable_move_region_to_cpu(src, &region, MOVE_READ))
 			goto out;
 
 		ret = fbCopyArea(src, dst, gc,
@@ -2930,7 +3016,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	fbFillSpans(drawable, gc, n, pt, width, sorted);
@@ -2958,7 +3045,8 @@ sna_set_spans(DrawablePtr drawable, GCPtr gc, char *src,
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	fbSetSpans(drawable, gc, src, pt, width, n, sorted);
@@ -3142,7 +3230,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
 	if (n == 0)
 		return;
 
-	if (!sna_pixmap_move_to_cpu(src_pixmap, false))
+	if (!sna_pixmap_move_to_cpu(src_pixmap, MOVE_READ))
 		return;
 	get_drawable_deltas(source, src_pixmap, &dx, &dy);
 	sx += dx;
@@ -3381,13 +3469,14 @@ fallback:
 	if (!sna_gc_move_to_cpu(gc, dst))
 		goto out;
 
-	if (!sna_drawable_move_region_to_cpu(dst, &region, true))
+	if (!sna_drawable_move_region_to_cpu(dst, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	RegionTranslate(&region,
 			src_x - dst_x - dst->x + src->x,
 			src_y - dst_y - dst->y + src->y);
-	if (!sna_drawable_move_region_to_cpu(src, &region, false))
+	if (!sna_drawable_move_region_to_cpu(src, &region, MOVE_READ))
 		goto out;
 
 	DBG(("%s: fbCopyPlane(%d, %d, %d, %d, %d,%d) %x\n",
@@ -3576,7 +3665,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	DBG(("%s: fbPolyPoint\n", __FUNCTION__));
@@ -4432,7 +4522,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	DBG(("%s: fbPolyLine\n", __FUNCTION__));
@@ -5104,6 +5195,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 {
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	struct sna *sna = to_sna_from_pixmap(pixmap);
+	struct sna_damage **damage;
 	RegionRec region;
 	unsigned flags;
 
@@ -5141,7 +5233,6 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 		goto spans_fallback;
 	if (gc->fillStyle == FillSolid) {
 		struct sna_pixmap *priv = sna_pixmap(pixmap);
-		struct sna_damage **damage;
 
 		DBG(("%s: trying blt solid fill [%08lx] paths\n",
 		     __FUNCTION__, gc->fgPixel));
@@ -5176,7 +5267,6 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 		}
 	} else if (flags & 4) {
 		struct sna_pixmap *priv = sna_pixmap(pixmap);
-		struct sna_damage **damage;
 
 		/* Try converting these to a set of rectangles instead */
 		if (sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
@@ -5241,7 +5331,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 	/* XXX Do we really want to base this decision on the amalgam ? */
 spans_fallback:
 	if (USE_SPANS &&
-	    sna_drawable_use_gpu_bo(drawable, &region.extents, NULL)) {
+	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		void (*line)(DrawablePtr, GCPtr, int, int, DDXPointPtr);
 		int i;
 
@@ -5281,7 +5371,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	DBG(("%s: fbPolySegment\n", __FUNCTION__));
@@ -5830,7 +5921,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	DBG(("%s: fbPolyRectangle\n", __FUNCTION__));
@@ -5954,7 +6046,8 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	/* XXX may still fallthrough to miZeroPolyArc */
@@ -6141,7 +6234,7 @@ static uint32_t
 get_pixel(PixmapPtr pixmap)
 {
 	DBG(("%s\n", __FUNCTION__));
-	if (!sna_pixmap_move_to_cpu(pixmap, false))
+	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
 		return 0;
 
 	switch (pixmap->drawable.bitsPerPixel) {
@@ -7015,7 +7108,7 @@ sna_poly_fill_rect_stippled_blt(DrawablePtr drawable,
 		bo = sna_pixmap(pixmap)->gpu_bo;
 	}
 
-	if (!sna_drawable_move_to_cpu(&stipple->drawable, false))
+	if (!sna_drawable_move_to_cpu(&stipple->drawable, MOVE_READ))
 		return false;
 
 	DBG(("%s: origin (%d, %d), extents (stipple): (%d, %d), stipple size %dx%d\n",
@@ -7184,7 +7277,12 @@ fallback:
 
 	if (!sna_gc_move_to_cpu(gc, draw))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(draw, &region, true))
+
+	flags = MOVE_WRITE;
+	if (gc->fillStyle == FillStippled  ||
+	    !(gc->alu == GXcopy || gc->alu == GXclear || gc->alu == GXset))
+		flags |= MOVE_READ;
+	if (!sna_drawable_move_region_to_cpu(draw, &region, flags))
 		goto out;
 
 	DBG(("%s: fallback - fbPolyFillRect\n", __FUNCTION__));
@@ -7519,7 +7617,7 @@ static bool sna_set_glyph(CharInfoPtr in, CharInfoPtr out)
 	w = (w + 7) >> 3;
 
 	out->metrics = in->metrics;
-	out->bits = malloc(w*h);
+	out->bits = malloc((w*h + 7) & ~7);
 	if (out->bits == NULL)
 		return false;
 
@@ -7630,7 +7728,8 @@ sna_poly_text8(DrawablePtr drawable, GCPtr gc,
 		if (!sna_gc_move_to_cpu(gc, drawable))
 			goto out;
 
-		if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+		if (!sna_drawable_move_region_to_cpu(drawable, &region,
+						     MOVE_READ | MOVE_WRITE))
 			goto out;
 
 		DBG(("%s: fallback -- fbPolyGlyphBlt\n", __FUNCTION__));
@@ -7703,7 +7802,8 @@ sna_poly_text16(DrawablePtr drawable, GCPtr gc,
 
 		if (!sna_gc_move_to_cpu(gc, drawable))
 			goto out;
-		if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+		if (!sna_drawable_move_region_to_cpu(drawable, &region,
+						     MOVE_READ | MOVE_WRITE))
 			goto out;
 
 		DBG(("%s: fallback -- fbPolyGlyphBlt\n", __FUNCTION__));
@@ -7776,7 +7876,8 @@ sna_image_text8(DrawablePtr drawable, GCPtr gc,
 
 		if (!sna_gc_move_to_cpu(gc, drawable))
 			goto out;
-		if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+		if (!sna_drawable_move_region_to_cpu(drawable, &region,
+						     MOVE_READ | MOVE_WRITE))
 			goto out;
 
 		DBG(("%s: fallback -- fbImageGlyphBlt\n", __FUNCTION__));
@@ -7842,7 +7943,8 @@ sna_image_text16(DrawablePtr drawable, GCPtr gc,
 
 		if (!sna_gc_move_to_cpu(gc, drawable))
 			goto out;
-		if(!sna_drawable_move_region_to_cpu(drawable, &region, true))
+		if(!sna_drawable_move_region_to_cpu(drawable, &region,
+						    MOVE_READ | MOVE_WRITE))
 			goto out;
 
 		DBG(("%s: fallback -- fbImageGlyphBlt\n", __FUNCTION__));
@@ -8151,7 +8253,8 @@ fallback:
 	DBG(("%s: fallback\n", __FUNCTION__));
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 	DBG(("%s: fallback -- fbPolyGlyphBlt\n", __FUNCTION__));
 	fbPolyGlyphBlt(drawable, gc, x, y, n, info, base);
@@ -8324,9 +8427,10 @@ sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable,
 	DBG(("%s: fallback\n", __FUNCTION__));
 	if (!sna_gc_move_to_cpu(gc, drawable))
 		goto out;
-	if (!sna_pixmap_move_to_cpu(bitmap, false))
+	if (!sna_pixmap_move_to_cpu(bitmap, MOVE_READ))
 		goto out;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		goto out;
 
 	DBG(("%s: fallback, fbPushPixels(%d, %d, %d %d)\n",
@@ -8408,7 +8512,7 @@ sna_get_image(DrawablePtr drawable,
 	region.extents.y2 = region.extents.y1 + h;
 	region.data = NULL;
 
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, false))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region, MOVE_READ))
 		return;
 
 	fbGetImage(drawable, x, y, w, h, format, mask, dst);
@@ -8424,7 +8528,7 @@ sna_get_spans(DrawablePtr drawable, int wMax,
 		return;
 
 	region.data = NULL;
-	if (!sna_drawable_move_region_to_cpu(drawable, &region, false))
+	if (!sna_drawable_move_region_to_cpu(drawable, &region, MOVE_READ))
 		return;
 
 	fbGetSpans(drawable, wMax, pt, width, n, start);
@@ -8442,7 +8546,7 @@ sna_copy_window(WindowPtr win, DDXPointRec origin, RegionPtr src)
 
 	if (wedged(sna)) {
 		DBG(("%s: fallback -- wedged\n", __FUNCTION__));
-		if (sna_pixmap_move_to_cpu(pixmap, true))
+		if (sna_pixmap_move_to_cpu(pixmap, MOVE_READ | MOVE_WRITE))
 			fbCopyWindow(win, origin, src);
 		return;
 	}
@@ -8734,6 +8838,56 @@ static void sna_accel_expire(struct sna *sna)
 		_sna_accel_disarm_timer(sna, EXPIRE_TIMER);
 }
 
+static bool
+sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
+{
+	PixmapPtr pixmap = priv->pixmap;
+
+	assert (!priv->flush);
+
+	if (pixmap->devPrivate.ptr == NULL &&
+	    !sna_pixmap_alloc_cpu(sna, pixmap, priv))
+		return false;
+
+	if (priv->gpu_damage) {
+		BoxPtr box;
+		int n;
+
+		DBG(("%s: flushing GPU damage\n", __FUNCTION__));
+
+		n = sna_damage_get_boxes(priv->gpu_damage, &box);
+		if (n) {
+			struct kgem_bo *dst_bo;
+			Bool ok = FALSE;
+
+			dst_bo = NULL;
+			if (sna->kgem.gen >= 30)
+				dst_bo = pixmap_vmap(&sna->kgem, pixmap);
+			if (dst_bo)
+				ok = sna->render.copy_boxes(sna, GXcopy,
+							    pixmap, priv->gpu_bo, 0, 0,
+							    pixmap, dst_bo, 0, 0,
+							    box, n);
+			if (!ok)
+				sna_read_boxes(sna,
+					       priv->gpu_bo, 0, 0,
+					       pixmap, 0, 0,
+					       box, n);
+		}
+
+		__sna_damage_destroy(priv->gpu_damage);
+		priv->gpu_damage = NULL;
+	}
+
+	sna_damage_all(&priv->cpu_damage,
+		       pixmap->drawable.width,
+		       pixmap->drawable.height);
+	sna_pixmap_destroy_gpu_bo(sna, priv);
+
+	priv->gpu = false;
+	return true;
+}
+
 static void sna_accel_inactive(struct sna *sna)
 {
 	struct sna_pixmap *priv, *next;
@@ -8792,7 +8946,7 @@ static void sna_accel_inactive(struct sna *sna)
 		if (!priv->pinned) {
 			DBG(("%s: discarding inactive GPU bo handle=%d\n",
 			     __FUNCTION__, priv->gpu_bo->handle));
-			if (!sna_pixmap_move_to_cpu(priv->pixmap, true))
+			if (!sna_pixmap_free_gpu(sna, priv))
 				list_add(&priv->inactive, &preserve);
 		}
 	}
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index a42b190..a488102 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -559,7 +559,7 @@ get_pixel(PicturePtr picture)
 
 	DBG(("%s: %p\n", __FUNCTION__, pixmap));
 
-	if (!sna_pixmap_move_to_cpu(pixmap, false))
+	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
 		return 0;
 
 	switch (pixmap->drawable.bitsPerPixel) {
@@ -981,11 +981,8 @@ static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op)
 	struct kgem_bo *bo = (struct kgem_bo *)op->u.blt.src_pixmap;
 
 	blt_done(sna, op);
-	if (bo) {
-		struct kgem *kgem = &sna->kgem;
-		kgem_bo_sync(kgem, bo, true);
-		kgem_bo_destroy(kgem, bo);
-	}
+	if (bo)
+		kgem_bo_destroy(&sna->kgem, bo);
 }
 
 fastcall static void
@@ -1113,11 +1110,9 @@ prepare_blt_put(struct sna *sna,
 	DBG(("%s\n", __FUNCTION__));
 
 	if (priv) {
-		if (!priv->gpu_only) {
-			src_bo = priv->cpu_bo;
-			if (!src_bo)
-				src_bo = pixmap_vmap(&sna->kgem, src);
-		}
+		src_bo = priv->cpu_bo;
+		if (!src_bo)
+			src_bo = pixmap_vmap(&sna->kgem, src);
 	} else {
 		src_bo = kgem_create_map(&sna->kgem,
 					 src->devPrivate.ptr,
@@ -1140,7 +1135,7 @@ prepare_blt_put(struct sna *sna,
 				       GXcopy))
 			return FALSE;
 	} else {
-		if (!sna_pixmap_move_to_cpu(src, false))
+		if (!sna_pixmap_move_to_cpu(src, MOVE_READ))
 			return FALSE;
 
 		op->blt   = blt_put_composite;
@@ -1185,8 +1180,6 @@ has_cpu_area(PixmapPtr pixmap, int x, int y, int w, int h)
 		return TRUE;
 	if (!priv->gpu_bo)
 		return TRUE;
-	if (priv->gpu_only)
-		return FALSE;
 
 	if (priv->gpu_damage == NULL)
 		return TRUE;
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index b54bd33..fcc0070 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -401,6 +401,7 @@ sna_composite(CARD8 op,
 {
 	struct sna *sna = to_sna_from_drawable(dst->pDrawable);
 	struct sna_composite_op tmp;
+	unsigned flags;
 	RegionRec region;
 	int dx, dy;
 
@@ -486,24 +487,34 @@ fallback:
 	     dst_x, dst_y,
 	     dst->pDrawable->x, dst->pDrawable->y,
 	     width, height));
-
-	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, true))
+	if (op == PictOpSrc || op == PictOpClear)
+		flags = MOVE_WRITE;
+	else
+		flags = MOVE_WRITE | MOVE_READ;
+	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, flags))
 		goto out;
 	if (dst->alphaMap &&
-	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, true))
+	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
+				      MOVE_WRITE | MOVE_READ))
 		goto out;
 	if (src->pDrawable) {
-		if (!sna_drawable_move_to_cpu(src->pDrawable, false))
+		if (!sna_drawable_move_to_cpu(src->pDrawable,
+					      MOVE_READ))
 			goto out;
+
 		if (src->alphaMap &&
-		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, false))
+		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+					      MOVE_READ))
 			goto out;
 	}
 	if (mask && mask->pDrawable) {
-		if (!sna_drawable_move_to_cpu(mask->pDrawable, false))
+		if (!sna_drawable_move_to_cpu(mask->pDrawable,
+					      MOVE_READ))
 			goto out;
+
 		if (mask->alphaMap &&
-		    !sna_drawable_move_to_cpu(mask->alphaMap->pDrawable, false))
+		    !sna_drawable_move_to_cpu(mask->alphaMap->pDrawable,
+					      MOVE_READ))
 			goto out;
 	}
 
@@ -708,7 +719,7 @@ sna_composite_rectangles(CARD8		 op,
 	 */
 	if (op == PictOpSrc || op == PictOpClear) {
 		priv = sna_pixmap_attach(pixmap);
-		if (priv && !priv->gpu_only)
+		if (priv)
 			sna_damage_subtract(&priv->cpu_damage, &region);
 	}
 
@@ -730,19 +741,22 @@ sna_composite_rectangles(CARD8		 op,
 		goto fallback;
 	}
 
-	if (!priv->gpu_only) {
-		assert_pixmap_contains_box(pixmap, RegionExtents(&region));
-		sna_damage_add(&priv->gpu_damage, &region);
-	}
+	assert_pixmap_contains_box(pixmap, RegionExtents(&region));
+	sna_damage_add(&priv->gpu_damage, &region);
 
 	goto done;
 
 fallback:
 	DBG(("%s: fallback\n", __FUNCTION__));
-	if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, &region, true))
+	if (op <= PictOpSrc)
+		error = MOVE_WRITE;
+	else
+		error = MOVE_WRITE | MOVE_READ;
+	if (!sna_drawable_move_region_to_cpu(&pixmap->drawable, &region, error))
 		goto done;
+
 	if (dst->alphaMap &&
-	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, true))
+	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, error))
 		goto done;
 
 	if (op == PictOpSrc || op == PictOpClear) {
diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index b1550a2..3b2cd7b 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -1704,6 +1704,7 @@ sna_crtc_resize(ScrnInfoPtr scrn, int width, int height)
 
 	if (old_fb_id)
 		drmModeRmFB(sna->kgem.fd, old_fb_id);
+	sna_pixmap_get_bo(old_front)->needs_flush = true;
 	scrn->pScreen->DestroyPixmap(old_front);
 
 	return TRUE;
@@ -1834,7 +1835,6 @@ sna_page_flip(struct sna *sna,
 	count = do_page_flip(sna, data, ref_crtc_hw_id);
 	DBG(("%s: page flipped %d crtcs\n", __FUNCTION__, count));
 	if (count) {
-		bo->cpu_read = bo->cpu_write = false;
 		bo->gpu = true;
 
 		/* Although the kernel performs an implicit flush upon
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index e572a6b..76a1d70 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -319,9 +319,6 @@ static void damage(PixmapPtr pixmap, RegionPtr region)
 	struct sna_pixmap *priv;
 
 	priv = sna_pixmap(pixmap);
-	if (priv->gpu_only)
-		return;
-
 	if (region == NULL) {
 damage_all:
 		sna_damage_all(&priv->gpu_damage,
@@ -346,12 +343,11 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
 	struct sna_pixmap *priv;
 
 	priv = sna_pixmap(pixmap);
-	if (!priv->gpu_only) {
-		sna_damage_all(&priv->gpu_damage,
-				pixmap->drawable.width,
-				pixmap->drawable.height);
-		sna_damage_destroy(&priv->cpu_damage);
-	}
+	sna_damage_all(&priv->gpu_damage,
+		       pixmap->drawable.width,
+		       pixmap->drawable.height);
+	sna_damage_destroy(&priv->cpu_damage);
+
 	assert(priv->gpu_bo->refcnt > 1);
 	priv->gpu_bo->refcnt--;
 	priv->gpu_bo = ref(bo);
diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index 82d091a..4fd4752 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -79,6 +79,7 @@
 #define NO_GLYPH_CACHE 0
 #define NO_GLYPHS_TO_DST 0
 #define NO_GLYPHS_VIA_MASK 0
+#define NO_SMALL_MASK 0
 #define NO_GLYPHS_SLOW 0
 
 #define CACHE_PICTURE_SIZE 1024
@@ -642,7 +643,6 @@ static bool
 clear_pixmap(struct sna *sna, PixmapPtr pixmap)
 {
 	struct sna_pixmap *priv = sna_pixmap(pixmap);
-	assert(priv->gpu_only);
 	return sna->render.fill_one(sna, pixmap, priv->gpu_bo, 0,
 				    0, 0,
 				    pixmap->drawable.width,
@@ -711,7 +711,8 @@ glyphs_via_mask(struct sna *sna,
 	}
 
 	component_alpha = NeedsComponent(format->format);
-	if ((uint32_t)width * height * format->depth < 8 * 4096) {
+	if (!NO_SMALL_MASK &&
+	    (uint32_t)width * height * format->depth < 8 * 4096) {
 		pixman_image_t *mask_image;
 		int s;
 
@@ -1023,16 +1024,22 @@ glyphs_fallback(CARD8 op,
 	if (!RegionNotEmpty(&region))
 		return;
 
-	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		return;
 	if (dst->alphaMap &&
-	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, true))
+	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
+				      MOVE_READ | MOVE_WRITE))
 		return;
+
 	if (src->pDrawable) {
-		if (!sna_drawable_move_to_cpu(src->pDrawable, false))
+		if (!sna_drawable_move_to_cpu(src->pDrawable,
+					      MOVE_READ))
 			return;
+
 		if (src->alphaMap &&
-		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, false))
+		    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+					      MOVE_READ))
 			return;
 	}
 	RegionTranslate(&region, -dst->pDrawable->x, -dst->pDrawable->y);
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 19385af..c92ecb6 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -347,9 +347,6 @@ _texture_is_cpu(PixmapPtr pixmap, const BoxRec *box)
 	if (priv == NULL)
 		return TRUE;
 
-	if (priv->gpu_only)
-		return FALSE;
-
 	if (priv->gpu_bo == NULL)
 		return TRUE;
 
@@ -955,7 +952,7 @@ sna_render_picture_fixup(struct sna *sna,
 	}
 
 	if (picture->pDrawable &&
-	    !sna_drawable_move_to_cpu(picture->pDrawable, false))
+	    !sna_drawable_move_to_cpu(picture->pDrawable, MOVE_READ))
 		return 0;
 
 	channel->bo = kgem_create_buffer(&sna->kgem,
@@ -1099,7 +1096,7 @@ sna_render_picture_convert(struct sna *sna,
 		return 0;
 	}
 
-	if (!sna_pixmap_move_to_cpu(pixmap, false))
+	if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
 		return 0;
 
 	src = pixman_image_create_bits(picture->format,
diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h
index 9e3676a..497c0fd 100644
--- a/src/sna/sna_render_inline.h
+++ b/src/sna/sna_render_inline.h
@@ -74,6 +74,13 @@ is_gpu(DrawablePtr drawable)
 }
 
 static inline Bool
+is_busy_cpu(DrawablePtr drawable)
+{
+	struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+	return priv && priv->cpu_bo && priv->cpu_bo->gpu;
+}
+
+static inline Bool
 is_cpu(DrawablePtr drawable)
 {
 	struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
@@ -91,7 +98,7 @@ static inline Bool
 too_small(DrawablePtr drawable)
 {
 	return ((uint32_t)drawable->width * drawable->height * drawable->bitsPerPixel <= 8*4096) &&
-		!is_dirty_gpu(drawable);
+		!(is_dirty_gpu(drawable) || is_busy_cpu(drawable));
 }
 
 static inline Bool
@@ -99,7 +106,7 @@ picture_is_gpu(PicturePtr picture)
 {
 	if (!picture || !picture->pDrawable)
 		return FALSE;
-	return is_gpu(picture->pDrawable);
+	return is_gpu(picture->pDrawable) || is_busy_cpu(picture->pDrawable);
 }
 
 static inline Bool sna_blt_compare_depth(DrawablePtr src, DrawablePtr dst)
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index 83dbd8c..6e68130 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -199,17 +199,38 @@ sna_tiling_composite_done(struct sna *sna,
 				}
 				tmp.done(sna, &tmp);
 			} else {
+				unsigned int flags;
 				DBG(("%s -- falback\n", __FUNCTION__));
 
-				if (!sna_drawable_move_to_cpu(tile->dst->pDrawable, true))
+				if (tile->op <= PictOpSrc)
+					flags = MOVE_WRITE;
+				else
+					flags = MOVE_WRITE | MOVE_READ;
+				if (!sna_drawable_move_to_cpu(tile->dst->pDrawable,
+							      flags))
+					goto done;
+				if (tile->dst->alphaMap &&
+				    !sna_drawable_move_to_cpu(tile->dst->alphaMap->pDrawable,
+							      flags))
 					goto done;
 
 				if (tile->src->pDrawable &&
-				    !sna_drawable_move_to_cpu(tile->src->pDrawable, false))
+				    !sna_drawable_move_to_cpu(tile->src->pDrawable,
+							      MOVE_READ))
+					goto done;
+				if (tile->src->alphaMap &&
+				    !sna_drawable_move_to_cpu(tile->src->alphaMap->pDrawable,
+							      MOVE_READ))
 					goto done;
 
 				if (tile->mask && tile->mask->pDrawable &&
-				    !sna_drawable_move_to_cpu(tile->mask->pDrawable, false))
+				    !sna_drawable_move_to_cpu(tile->mask->pDrawable,
+							      MOVE_READ))
+					goto done;
+
+				if (tile->mask && tile->mask->alphaMap &&
+				    !sna_drawable_move_to_cpu(tile->mask->alphaMap->pDrawable,
+							      MOVE_READ))
 					goto done;
 
 				fbComposite(tile->op,
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 27987d8..287f883 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -3176,7 +3176,8 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 	}
 
 	region.data = NULL;
-	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, true))
+	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
+					     MOVE_READ | MOVE_WRITE))
 		return true;
 
 	pixmap = get_drawable_pixmap(dst->pDrawable);
@@ -3313,16 +3314,20 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		region.extents.y2 = dst_y + extents.y2;
 		region.data = NULL;
 
-		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, true))
+		if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region,
+						     MOVE_READ | MOVE_WRITE))
 			goto done;
 		if (dst->alphaMap  &&
-		    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, true))
+		    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
+					      MOVE_READ | MOVE_WRITE))
 			goto done;
 		if (src->pDrawable) {
-			if (!sna_drawable_move_to_cpu(src->pDrawable, false))
+			if (!sna_drawable_move_to_cpu(src->pDrawable,
+						      MOVE_READ))
 				goto done;
 			if (src->alphaMap &&
-			    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable, false))
+			    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+						      MOVE_READ))
 				goto done;
 		}
 
@@ -3661,20 +3666,18 @@ skip:
 static void mark_damaged(PixmapPtr pixmap, struct sna_pixmap *priv,
 			 BoxPtr box, int16_t x, int16_t y)
 {
-	if (!priv->gpu_only) {
-		box->x1 += x; box->x2 += x;
-		box->y1 += y; box->y2 += y;
-		if (box->x1 <= 0 && box->y1 <= 0 &&
-		    box->x2 >= pixmap->drawable.width &&
-		    box->y2 >= pixmap->drawable.height) {
-			sna_damage_destroy(&priv->cpu_damage);
-			sna_damage_all(&priv->gpu_damage,
-				       pixmap->drawable.width,
-				       pixmap->drawable.height);
-		} else {
-			sna_damage_add_box(&priv->gpu_damage, box);
-			sna_damage_subtract_box(&priv->cpu_damage, box);
-		}
+	box->x1 += x; box->x2 += x;
+	box->y1 += y; box->y2 += y;
+	if (box->x1 <= 0 && box->y1 <= 0 &&
+	    box->x2 >= pixmap->drawable.width &&
+	    box->y2 >= pixmap->drawable.height) {
+		sna_damage_destroy(&priv->cpu_damage);
+		sna_damage_all(&priv->gpu_damage,
+			       pixmap->drawable.width,
+			       pixmap->drawable.height);
+	} else {
+		sna_damage_add_box(&priv->gpu_damage, box);
+		sna_damage_subtract_box(&priv->cpu_damage, box);
 	}
 }
 
@@ -3887,7 +3890,8 @@ sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t)
 	}
 
 	DBG(("%s -- fallback\n", __FUNCTION__));
-	if (sna_drawable_move_to_cpu(picture->pDrawable, true))
+	if (sna_drawable_move_to_cpu(picture->pDrawable,
+				     MOVE_READ | MOVE_WRITE))
 		fbAddTraps(picture, x, y, n, t);
 }
 
commit 6e47f283711d122d96384a1a82854c11644e6d68
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 23:49:18 2011 +0000

    sna/gen3: Enforce a minimum width of 2 elements for the render target
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 56e4b66..3fb98bc 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2201,6 +2201,26 @@ gen3_composite_set_target(struct sna *sna,
 	if (priv == NULL)
 		return FALSE;
 
+	/* For single-stream mode there should be no minimum alignment
+	 * required, except that the width must be at least 2 elements.
+	 */
+	if (priv->gpu_bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
+		struct kgem_bo *bo;
+
+		if (priv->pinned)
+			return FALSE;
+
+		bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
+				     op->dst.width, op->dst.height,
+				     2*op->dst.pixmap->drawable.bitsPerPixel,
+				     op->dst.pixmap->drawable.bitsPerPixel);
+		if (bo == NULL)
+			return FALSE;
+
+		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+		priv->gpu_bo = bo;
+	}
+
 	op->dst.bo = priv->gpu_bo;
 	op->damage = &priv->gpu_damage;
 	if (sna_damage_is_all(op->damage, op->dst.width, op->dst.height))
commit 2ff0826f94ca16e95cd662385f7091be750dec30
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 23:33:25 2011 +0000

    sna: Discard GPU damage first before choosing where to fill_boxes()
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 09d3827..b54bd33 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -702,11 +702,6 @@ sna_composite_rectangles(CARD8		 op,
 
 	boxes = pixman_region_rectangles(&region, &num_boxes);
 
-	if (too_small(dst->pDrawable)) {
-		DBG(("%s: fallback, dst is too small\n", __FUNCTION__));
-		goto fallback;
-	}
-
 	/* If we going to be overwriting any CPU damage with a subsequent
 	 * operation, then we may as well delete it without moving it
 	 * first to the GPU.
@@ -717,6 +712,11 @@ sna_composite_rectangles(CARD8		 op,
 			sna_damage_subtract(&priv->cpu_damage, &region);
 	}
 
+	if (too_small(dst->pDrawable)) {
+		DBG(("%s: fallback, dst is too small\n", __FUNCTION__));
+		goto fallback;
+	}
+
 	priv = sna_pixmap_move_to_gpu(pixmap);
 	if (priv == NULL) {
 		DBG(("%s: fallback due to no GPU bo\n", __FUNCTION__));
commit 55520bab578865f878965aa362ec4933f4b26050
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 23:22:38 2011 +0000

    sna/gen3: Initialise missing value of need ca pass for fill_boxeS()
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 7151e12..56e4b66 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3870,6 +3870,7 @@ gen3_render_fill_boxes(struct sna *sna,
 	tmp.floats_per_rect = 6;
 	tmp.rb_reversed = 0;
 	tmp.has_component_alpha = 0;
+	tmp.need_magic_ca_pass = false;
 
 	gen3_init_solid(&tmp.src, pixel);
 	tmp.mask.u.gen3.type = SHADER_NONE;
commit e56d5081ea2da930bc82036ac7c4c78b433117ed
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 23:18:02 2011 +0000

    sna: Wrap I915_GEM_GET_PARAM with valgrind markup
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index a95ba65..9de55ac 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -354,12 +354,24 @@ agp_aperture_size(struct pci_device *dev, int gen)
 	return dev->regions[gen < 30 ? 0 : 2].size;
 }
 
-void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
+static int gem_param(struct kgem *kgem, int name)
 {
 	drm_i915_getparam_t gp;
+	int v = 0;
+
+	VG_CLEAR(gp);
+	gp.param = name;
+	gp.value = &v;
+	drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+
+	VG(VALGRIND_MAKE_MEM_DEFINED(&v, sizeof(v)));
+	return v;
+}
+
+void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
+{
 	struct drm_i915_gem_get_aperture aperture;
 	unsigned int i;
-	int v;
 
 	memset(kgem, 0, sizeof(*kgem));
 
@@ -380,27 +392,15 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	kgem->next_request = __kgem_request_alloc();
 
 #if defined(USE_VMAP) && defined(I915_PARAM_HAS_VMAP)
-	if (!DBG_NO_VMAP) {
-		drm_i915_getparam_t gp;
-
-		v = 0;
-		VG_CLEAR(gp);
-		gp.param = I915_PARAM_HAS_VMAP;
-		gp.value = &v;
-		drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
-		kgem->has_vmap = v > 0;
-	}
+	if (!DBG_NO_VMAP)
+		kgem->has_vmap = gem_param(kgem, I915_PARAM_HAS_VMAP) > 0;
 #endif
 	DBG(("%s: using vmap=%d\n", __FUNCTION__, kgem->has_vmap));
 
 	if (gen < 40) {
 		if (!DBG_NO_RELAXED_FENCING) {
-			v = 0;
-			VG_CLEAR(gp);
-			gp.param = I915_PARAM_HAS_RELAXED_FENCING;
-			gp.value = &v;
-			drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
-			kgem->has_relaxed_fencing = v > 0;
+			kgem->has_relaxed_fencing =
+				gem_param(kgem, I915_PARAM_HAS_RELAXED_FENCING);
 		}
 	} else
 		kgem->has_relaxed_fencing = 1;
@@ -429,13 +429,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		kgem->max_object_size = kgem->aperture_low;
 	DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size));
 
-	v = 8;
-	VG_CLEAR(gp);
-	gp.param = I915_PARAM_NUM_FENCES_AVAIL;
-	gp.value = &v;
-	(void)drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp);
-	kgem->fence_max = v - 2;
-
+	kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
+	if (kgem->fence_max < 0)
+		kgem->fence_max = 5;
 	DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max));
 }
 
commit e0399ec1619fe4f87f0578791e697a7e2a8c86dc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 23:07:37 2011 +0000

    sna: Suppress an overwritten XY_SRC_COPY
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index a4af20d..a42b190 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1492,6 +1492,7 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 {
 	struct kgem *kgem = &sna->kgem;
 	uint32_t br13, cmd, *b;
+	bool overwrites;
 
 	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
 	     box->x1, box->y1, box->x2, box->y2));
@@ -1526,8 +1527,8 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 	}
 
 	/* All too frequently one blt completely overwrites the previous */
-	if (kgem->nbatch >= 6 &&
-	    (alu == GXcopy || alu == GXclear || alu == GXset) &&
+	overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
+	if (overwrites && kgem->nbatch >= 6 &&
 	    kgem->batch[kgem->nbatch-6] == cmd &&
 	    *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(uint64_t *)box &&
 	    kgem->reloc[kgem->nreloc-1].target_handle == bo->handle) {
@@ -1536,6 +1537,21 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 		kgem->batch[kgem->nbatch-1] = color;
 		return TRUE;
 	}
+	if (overwrites && kgem->nbatch >= 8 &&
+	    (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD &&
+	    *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(uint64_t *)box &&
+	    kgem->reloc[kgem->nreloc-2].target_handle == bo->handle) {
+		DBG(("%s: replacing last copy\n", __FUNCTION__));
+		kgem->batch[kgem->nbatch-8] = cmd;
+		kgem->batch[kgem->nbatch-7] = br13;
+		kgem->batch[kgem->nbatch-3] = color;
+		/* Keep the src bo as part of the execlist, just remove
+		 * its relocation entry.
+		 */
+		kgem->nreloc--;
+		kgem->nbatch -= 2;
+		return TRUE;
+	}
 
 	kgem_set_mode(kgem, KGEM_BLT);
 	if (!kgem_check_batch(kgem, 6) ||
commit 1684ed6a5e8c26ecb48cc1a5025107466526fe94
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 19:35:35 2011 +0000

    sna: Clean up compiler warnings for shadowed variables
    
    No outright bug, just plenty of unwanted noise.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index da61d01..8957a66 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -841,11 +841,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box)
 
 	region_set(&r, box);
 	if (sna_damage_intersect(priv->cpu_damage, &r, &i)) {
-		BoxPtr box = REGION_RECTS(&i);
 		int n = REGION_NUM_RECTS(&i);
 		struct kgem_bo *src_bo;
 		Bool ok = FALSE;
 
+		box = REGION_RECTS(&i);
 		src_bo = pixmap_vmap(&sna->kgem, pixmap);
 		if (src_bo)
 			ok = sna->render.copy_boxes(sna, GXcopy,
@@ -1774,10 +1774,10 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 			src = (uint8_t*)bits + (box->y1 - y) * src_stride + bx1/8;
 			src_stride -= bw;
 			do {
-				int i = bw;
+				int j = bw;
 				do {
 					*dst++ = byte_reverse(*src++);
-				} while (--i);
+				} while (--j);
 				dst += bstride;
 				src += src_stride;
 			} while (--bh);
@@ -6270,13 +6270,15 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable,
 								 w, h,
 								 dst_x + dx, dst_y + dy);
 							if (damage) {
-								BoxRec box;
-								box.x1 = dst_x + dx;
-								box.y1 = dst_y + dy;
-								box.x2 = box.x1 + w;
-								box.y2 = box.y1 + h;
-								assert_pixmap_contains_box(pixmap, &box);
-								sna_damage_add_box(damage, &box);
+								BoxRec b;
+
+								b.x1 = dst_x + dx;
+								b.y1 = dst_y + dy;
+								b.x2 = b.x1 + w;
+								b.y2 = b.y1 + h;
+
+								assert_pixmap_contains_box(pixmap, &b);
+								sna_damage_add_box(damage, &b);
 							}
 
 							dst_x += w;
@@ -6328,15 +6330,15 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable,
 								 w, h,
 								 dst_x + dx, dst_y + dy);
 							if (damage) {
-								BoxRec box;
+								BoxRec b;
 
-								box.x1 = dst_x + dx;
-								box.y1 = dst_y + dy;
-								box.x2 = box.x1 + w;
-								box.y2 = box.y1 + h;
+								b.x1 = dst_x + dx;
+								b.y1 = dst_y + dy;
+								b.x2 = b.x1 + w;
+								b.y2 = b.y1 + h;
 
-								assert_pixmap_contains_box(pixmap, &box);
-								sna_damage_add_box(damage, &box);
+								assert_pixmap_contains_box(pixmap, &b);
+								sna_damage_add_box(damage, &b);
 							}
 
 							dst_x += w;
@@ -6401,11 +6403,11 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
 		uint8_t *dst = (uint8_t *)pat;
 		const uint8_t *src = gc->stipple->devPrivate.ptr;
 		int stride = gc->stipple->devKind;
-		int n = gc->stipple->drawable.height;
+		int j = gc->stipple->drawable.height;
 		do {
 			*dst++ = byte_reverse(*src);
 			src += stride;
-		} while (--n);
+		} while (--j);
 	}
 
 	kgem_set_mode(&sna->kgem, KGEM_BLT);
diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c
index e065c6a..82d091a 100644
--- a/src/sna/sna_glyphs.c
+++ b/src/sna/sna_glyphs.c
@@ -1093,7 +1093,6 @@ glyphs_fallback(CARD8 op,
 			GlyphPtr g = *glyphs++;
 			PicturePtr picture;
 			pixman_image_t *glyph_image;
-			int dx, dy;
 
 			if (g->info.width == 0 || g->info.height == 0)
 				goto next_glyph;
@@ -1109,7 +1108,7 @@ glyphs_fallback(CARD8 op,
 			if (mask_format) {
 				DBG(("%s: glyph+(%d,%d) to mask (%d, %d)x(%d, %d)\n",
 				     __FUNCTION__,
-				     dx,dy,
+				     dx, dy,
 				     x - g->info.x,
 				     y - g->info.y,
 				     g->info.width,
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 8899f38..27987d8 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -2095,7 +2095,7 @@ free_boxes:
 	return ret;
 }
 
-static inline int coverage(int samples, pixman_fixed_t f)
+static inline int grid_coverage(int samples, pixman_fixed_t f)
 {
 	return (samples * pixman_fixed_frac(f) + pixman_fixed_1/2) / pixman_fixed_1;
 }
@@ -2155,7 +2155,7 @@ composite_unaligned_trap_row(struct sna *sna,
 		box.x2 = x2 + 1;
 
 		opacity = covered;
-		opacity *= coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x);
+		opacity *= grid_coverage(SAMPLES_X, trap->right.p1.x) - grid_coverage(SAMPLES_X, trap->left.p1.x);
 
 		if (opacity)
 			composite_unaligned_box(sna, tmp, &box,
@@ -2166,7 +2166,7 @@ composite_unaligned_trap_row(struct sna *sna,
 			box.x2 = x1++;
 
 			opacity = covered;
-			opacity *= SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x);
+			opacity *= SAMPLES_X - grid_coverage(SAMPLES_X, trap->left.p1.x);
 
 			if (opacity)
 				composite_unaligned_box(sna, tmp, &box,
@@ -2186,7 +2186,7 @@ composite_unaligned_trap_row(struct sna *sna,
 			box.x2 = x2 + 1;
 
 			opacity = covered;
-			opacity *= coverage(SAMPLES_X, trap->right.p1.x);
+			opacity *= grid_coverage(SAMPLES_X, trap->right.p1.x);
 
 			if (opacity)
 				composite_unaligned_box(sna, tmp, &box,
@@ -2210,13 +2210,13 @@ composite_unaligned_trap(struct sna *sna,
 	if (y1 == y2) {
 		composite_unaligned_trap_row(sna, tmp, trap, dx,
 					     y1, y1 + 1,
-					     coverage(SAMPLES_Y, trap->bottom) - coverage(SAMPLES_Y, trap->top),
+					     grid_coverage(SAMPLES_Y, trap->bottom) - grid_coverage(SAMPLES_Y, trap->top),
 					     clip);
 	} else {
 		if (pixman_fixed_frac(trap->top)) {
 			composite_unaligned_trap_row(sna, tmp, trap, dx,
 						     y1, y1 + 1,
-						     SAMPLES_Y - coverage(SAMPLES_Y, trap->top),
+						     SAMPLES_Y - grid_coverage(SAMPLES_Y, trap->top),
 						     clip);
 			y1++;
 		}
@@ -2230,7 +2230,7 @@ composite_unaligned_trap(struct sna *sna,
 		if (pixman_fixed_frac(trap->bottom))
 			composite_unaligned_trap_row(sna, tmp, trap, dx,
 						     y2, y2 + 1,
-						     coverage(SAMPLES_Y, trap->bottom),
+						     grid_coverage(SAMPLES_Y, trap->bottom),
 						     clip);
 	}
 }
@@ -2295,13 +2295,13 @@ blt_unaligned_box_row(PixmapPtr scratch,
 		blt_opacity(scratch,
 			    x1, x1+1,
 			    y1, y2,
-			    covered * (coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x)));
+			    covered * (grid_coverage(SAMPLES_X, trap->right.p1.x) - grid_coverage(SAMPLES_X, trap->left.p1.x)));
 	} else {
 		if (pixman_fixed_frac(trap->left.p1.x))
 			blt_opacity(scratch,
 				    x1, x1+1,
 				    y1, y2,
-				    covered * (SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x)));
+				    covered * (SAMPLES_X - grid_coverage(SAMPLES_X, trap->left.p1.x)));
 
 		if (x2 > x1 + 1) {
 			blt_opacity(scratch,
@@ -2314,7 +2314,7 @@ blt_unaligned_box_row(PixmapPtr scratch,
 			blt_opacity(scratch,
 				    x2, x2 + 1,
 				    y1, y2,
-				    covered * coverage(SAMPLES_X, trap->right.p1.x));
+				    covered * grid_coverage(SAMPLES_X, trap->right.p1.x));
 	}
 }
 
@@ -2374,11 +2374,11 @@ composite_unaligned_boxes_fallback(CARD8 op,
 
 		if (y1 == y2) {
 			blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1,
-					      coverage(SAMPLES_Y, t->bottom) - coverage(SAMPLES_Y, t->top));
+					      grid_coverage(SAMPLES_Y, t->bottom) - grid_coverage(SAMPLES_Y, t->top));
 		} else {
 			if (pixman_fixed_frac(t->top))
 				blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1,
-						      SAMPLES_Y - coverage(SAMPLES_Y, t->top));
+						      SAMPLES_Y - grid_coverage(SAMPLES_Y, t->top));
 
 			if (y2 > y1 + 1)
 				blt_unaligned_box_row(scratch, &extents, t, y1+1, y2,
@@ -2386,7 +2386,7 @@ composite_unaligned_boxes_fallback(CARD8 op,
 
 			if (pixman_fixed_frac(t->bottom))
 				blt_unaligned_box_row(scratch, &extents, t, y2, y2+1,
-						      coverage(SAMPLES_Y, t->bottom));
+						      grid_coverage(SAMPLES_Y, t->bottom));
 		}
 
 		mask = CreatePicture(0, &scratch->drawable,
commit 300586b229ae941ac31850af00d120d8c441c583
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 17:56:53 2011 +0000

    sna/gen4+: disable the blend unit for PictOpSrc
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 9ea98da..ebae915 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2947,7 +2947,8 @@ static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream)
 			struct gen4_cc_unit_state *state =
 				(struct gen4_cc_unit_state *)ptr;
 
-			state->cc3.blend_enable = 1;	/* enable color blend */
+			state->cc3.blend_enable =
+				!(j == GEN4_BLENDFACTOR_ZERO && i == GEN4_BLENDFACTOR_ONE);
 			state->cc4.cc_viewport_state_offset = vp >> 5;
 
 			state->cc5.logicop_func = 0xc;	/* COPY */
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 99a8f7c..89b7bef 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -3358,7 +3358,8 @@ static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream)
 			struct gen5_cc_unit_state *state =
 				(struct gen5_cc_unit_state *)ptr;
 
-			state->cc3.blend_enable = 1;	/* enable color blend */
+			state->cc3.blend_enable =
+				!(j == GEN5_BLENDFACTOR_ZERO && i == GEN5_BLENDFACTOR_ONE);
 			state->cc4.cc_viewport_state_offset = vp >> 5;
 
 			state->cc5.logicop_func = 0xc;	/* COPY */
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 117a6c3..ea82b20 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1680,7 +1680,8 @@ gen6_composite_create_blend_state(struct sna_static_stream *stream)
 			blend->blend0.dest_blend_factor = dst;
 			blend->blend0.source_blend_factor = src;
 			blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD;
-			blend->blend0.blend_enable = 1;
+			blend->blend0.blend_enable =
+				!(dst == GEN6_BLENDFACTOR_ZERO && src == GEN6_BLENDFACTOR_ONE);
 
 			blend->blend1.post_blend_clamp_enable = 1;
 			blend->blend1.pre_blend_clamp_enable = 1;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 25851da..121f137 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1794,7 +1794,8 @@ gen7_composite_create_blend_state(struct sna_static_stream *stream)
 			blend->blend0.dest_blend_factor = dst;
 			blend->blend0.source_blend_factor = src;
 			blend->blend0.blend_func = GEN7_BLENDFUNCTION_ADD;
-			blend->blend0.blend_enable = 1;
+			blend->blend0.blend_enable =
+				!(dst == GEN7_BLENDFACTOR_ZERO && src == GEN7_BLENDFACTOR_ONE);
 
 			blend->blend1.post_blend_clamp_enable = 1;
 			blend->blend1.pre_blend_clamp_enable = 1;
commit 0de7604d8ecfc73ef9e92059340d00ce5cfcdd75
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Dec 16 17:40:01 2011 +0000

    src/gen4+: Add support for depth 15 render copies/fills
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 73f659f..9ea98da 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -267,6 +267,7 @@ static const struct formatinfo {
 	{PICT_r8g8b8, GEN4_SURFACEFORMAT_R8G8B8_UNORM},
 	{PICT_r5g6b5, GEN4_SURFACEFORMAT_B5G6R5_UNORM},
 	{PICT_a1r5g5b5, GEN4_SURFACEFORMAT_B5G5R5A1_UNORM},
+	{PICT_x1r5g5b5, GEN4_SURFACEFORMAT_B5G5R5X1_UNORM},
 	{PICT_a2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10A2_UNORM},
 	{PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM},
 	{PICT_a2b10g10r10, GEN4_SURFACEFORMAT_R10G10B10A2_UNORM},
@@ -2174,6 +2175,7 @@ static uint32_t gen4_get_dest_format_for_depth(int depth)
 	default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
 	case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
 	case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM;
 	case 8:  return GEN4_SURFACEFORMAT_A8_UNORM;
 	}
 }
@@ -2183,9 +2185,10 @@ static uint32_t gen4_get_card_format_for_depth(int depth)
 	switch (depth) {
 	case 32:
 	default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM;
-	case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM;
+	case 30: return GEN4_SURFACEFORMAT_B10G10R10X2_UNORM;
 	case 24: return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM;
 	case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN4_SURFACEFORMAT_B5G5R5X1_UNORM;
 	case 8:  return GEN4_SURFACEFORMAT_A8_UNORM;
 	}
 }
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index d09d6bc..99a8f7c 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -255,6 +255,7 @@ static const struct formatinfo {
 	{PICT_r8g8b8, GEN5_SURFACEFORMAT_R8G8B8_UNORM},
 	{PICT_r5g6b5, GEN5_SURFACEFORMAT_B5G6R5_UNORM},
 	{PICT_a1r5g5b5, GEN5_SURFACEFORMAT_B5G5R5A1_UNORM},
+	{PICT_x1r5g5b5, GEN5_SURFACEFORMAT_B5G5R5X1_UNORM},
 	{PICT_a2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10A2_UNORM},
 	{PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM},
 	{PICT_a2b10g10r10, GEN5_SURFACEFORMAT_R10G10B10A2_UNORM},
@@ -493,6 +494,7 @@ static uint32_t gen5_get_dest_format_for_depth(int depth)
 	default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
 	case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
 	case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM;
 	case 8:  return GEN5_SURFACEFORMAT_A8_UNORM;
 	}
 }
@@ -502,9 +504,10 @@ static uint32_t gen5_get_card_format_for_depth(int depth)
 	switch (depth) {
 	case 32:
 	default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM;
-	case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM;
+	case 30: return GEN5_SURFACEFORMAT_B10G10R10X2_UNORM;
 	case 24: return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM;
 	case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN5_SURFACEFORMAT_B5G5R5X1_UNORM;
 	case 8:  return GEN5_SURFACEFORMAT_A8_UNORM;
 	}
 }
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 3ef1c42..117a6c3 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -207,6 +207,7 @@ static const struct formatinfo {
 	{PICT_r8g8b8, GEN6_SURFACEFORMAT_R8G8B8_UNORM},
 	{PICT_r5g6b5, GEN6_SURFACEFORMAT_B5G6R5_UNORM},
 	{PICT_a1r5g5b5, GEN6_SURFACEFORMAT_B5G5R5A1_UNORM},
+	{PICT_x1r5g5b5, GEN6_SURFACEFORMAT_B5G5R5X1_UNORM},
 	{PICT_a2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10A2_UNORM},
 	{PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM},
 	{PICT_a2b10g10r10, GEN6_SURFACEFORMAT_R10G10B10A2_UNORM},
@@ -317,6 +318,7 @@ static uint32_t gen6_get_dest_format_for_depth(int depth)
 	case 24: return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
 	case 30: return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
 	case 16: return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM;
 	case 8:  return GEN6_SURFACEFORMAT_A8_UNORM;
 	}
 }
@@ -326,9 +328,10 @@ static uint32_t gen6_get_card_format_for_depth(int depth)
 	switch (depth) {
 	default: assert(0);
 	case 32: return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM;
-	case 30: return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM;
+	case 30: return GEN6_SURFACEFORMAT_B10G10R10X2_UNORM;
 	case 24: return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM;
 	case 16: return GEN6_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN6_SURFACEFORMAT_B5G5R5X1_UNORM;
 	case 8:  return GEN6_SURFACEFORMAT_A8_UNORM;
 	}
 }
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index a45814e..25851da 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -215,6 +215,7 @@ static const struct formatinfo {
 	{PICT_r8g8b8, GEN7_SURFACEFORMAT_R8G8B8_UNORM},
 	{PICT_r5g6b5, GEN7_SURFACEFORMAT_B5G6R5_UNORM},
 	{PICT_a1r5g5b5, GEN7_SURFACEFORMAT_B5G5R5A1_UNORM},
+	{PICT_x1r5g5b5, GEN7_SURFACEFORMAT_B5G5R5X1_UNORM},
 	{PICT_a2r10g10b10, GEN7_SURFACEFORMAT_B10G10R10A2_UNORM},
 	{PICT_x2r10g10b10, GEN7_SURFACEFORMAT_B10G10R10X2_UNORM},
 	{PICT_a2b10g10r10, GEN7_SURFACEFORMAT_R10G10B10A2_UNORM},
@@ -325,6 +326,7 @@ static uint32_t gen7_get_dest_format_for_depth(int depth)
 	case 24: return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
 	case 30: return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
 	case 16: return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN7_SURFACEFORMAT_B5G5R5A1_UNORM;
 	case 8:  return GEN7_SURFACEFORMAT_A8_UNORM;
 	}
 }
@@ -334,9 +336,10 @@ static uint32_t gen7_get_card_format_for_depth(int depth)
 	switch (depth) {
 	default: assert(0);
 	case 32: return GEN7_SURFACEFORMAT_B8G8R8A8_UNORM;
-	case 30: return GEN7_SURFACEFORMAT_B10G10R10A2_UNORM;
+	case 30: return GEN7_SURFACEFORMAT_B10G10R10X2_UNORM;
 	case 24: return GEN7_SURFACEFORMAT_B8G8R8X8_UNORM;
 	case 16: return GEN7_SURFACEFORMAT_B5G6R5_UNORM;
+	case 15: return GEN7_SURFACEFORMAT_B5G5R5X1_UNORM;
 	case 8:  return GEN7_SURFACEFORMAT_A8_UNORM;
 	}
 }
commit a8fe50ab6503d9cb63931771318dc3e84d002092
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 17 11:15:36 2011 +0000

    uxa: Explicitly check for libdrm_intel in configure
    
    And remove the excess dependencies from the common files.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/configure.ac b/configure.ac
index da5fd77..1ac72ec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -142,11 +142,12 @@ AC_ARG_ENABLE(uxa,
 			     [Enable Unified Acceleration Architecture (UXA) [default=yes]]),
 	      [UXA="$enableval"],
 	      [UXA=yes])
+AC_MSG_RESULT([$UXA])
 AM_CONDITIONAL(UXA, test x$UXA != xno)
 if test "x$UXA" != "xno"; then
 	AC_DEFINE(USE_UXA, 1, [Enable UXA support])
+	PKG_CHECK_MODULES(DRMINTEL, [libdrm_intel >= 2.4.29])
 fi
-AC_MSG_RESULT([$UXA])
 
 AC_MSG_CHECKING([whether to include GLAMOR support])
 AC_ARG_ENABLE(glamor,
@@ -187,7 +188,7 @@ XORG_DRIVER_CHECK_EXT(DPMSExtension, xextproto)
 
 # Obtain compiler/linker options for the driver dependencies
 PKG_CHECK_MODULES(XORG, [xorg-server >= $required_xorg_xserver_version xproto fontsproto pixman-1 >= $required_pixman_version $REQUIRED_MODULES])
-PKG_CHECK_MODULES(DRM, [libdrm >= 2.4.29])
+PKG_CHECK_MODULES(DRM, [libdrm >= 2.4.24]) # libdrm_intel is checked separately
 PKG_CHECK_MODULES(DRI, [xf86driproto], , DRI=no)
 PKG_CHECK_MODULES(DRI2, [dri2proto >= 2.6],, DRI2=no)
 PKG_CHECK_MODULES(PCIACCESS, [pciaccess >= 0.10])
diff --git a/src/Makefile.am b/src/Makefile.am
index afaed36..62ac04f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -26,8 +26,7 @@ SUBDIRS = xvmc render_program legacy
 # _ladir passes a dummy rpath to libtool so the thing will actually link
 # TODO: -nostdlib/-Bstatic/-lgcc platform magic, not installing the .a, etc.
 
-AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \
-	@PCIACCESS_CFLAGS@
+AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @DRM_CFLAGS@ @PCIACCESS_CFLAGS@
 
 intel_drv_la_LTLIBRARIES = intel_drv.la
 intel_drv_la_LDFLAGS = -module -avoid-version
@@ -46,8 +45,9 @@ intel_drv_la_SOURCES = \
 	$(NULL)
 
 if UXA
+AM_CFLAGS += @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRMINTEL_CFLAGS@
 AM_CFLAGS += -I$(top_srcdir)/uxa -I$(top_srcdir)/src/render_program
-intel_drv_la_LIBADD += @UDEV_LIBS@ @DRM_LIBS@ -ldrm_intel ../uxa/libuxa.la
+intel_drv_la_LIBADD += @UDEV_LIBS@ @DRMINTEL_LIBS@ @DRM_LIBS@ ../uxa/libuxa.la
 intel_drv_la_SOURCES += \
          brw_defines.h \
          brw_structs.h \
diff --git a/src/intel.h b/src/intel.h
index 5423c20..b497bdf 100644
--- a/src/intel.h
+++ b/src/intel.h
@@ -63,7 +63,6 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "xorg-server.h"
 #include <pciaccess.h>
 
-#include "xf86drm.h"
 #define _XF86DRI_SERVER_
 #include "dri2.h"
 #include "intel_bufmgr.h"
diff --git a/src/intel_display.c b/src/intel_display.c
index 5ad8625..d525ffa 100644
--- a/src/intel_display.c
+++ b/src/intel_display.c
@@ -40,6 +40,7 @@
 
 #include "intel.h"
 #include "intel_bufmgr.h"
+#include "xf86drm.h"
 #include "xf86drmMode.h"
 #include "X11/Xatom.h"
 #include "X11/extensions/dpmsconst.h"
diff --git a/src/intel_dri.c b/src/intel_dri.c
index 135ba4e..152313a 100644
--- a/src/intel_dri.c
+++ b/src/intel_dri.c
@@ -53,6 +53,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 #include "xf86PciInfo.h"
 #include "xf86Pci.h"
+#include "xf86drm.h"
 
 #include "windowstr.h"
 #include "shadow.h"
diff --git a/src/intel_driver.c b/src/intel_driver.c
index 9094fd1..9d1c4e8 100644
--- a/src/intel_driver.c
+++ b/src/intel_driver.c
@@ -48,6 +48,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "xf86.h"
 #include "xf86_OSproc.h"
 #include "xf86cmap.h"
+#include "xf86drm.h"
 #include "compiler.h"
 #include "mibstore.h"
 #include "mipointer.h"
diff --git a/src/intel_module.c b/src/intel_module.c
index f452e1b..fb98a04 100644
--- a/src/intel_module.c
+++ b/src/intel_module.c
@@ -31,15 +31,13 @@
 #include "xf86.h"
 #include "xf86_OSproc.h"
 #include "xf86cmap.h"
+#include "xf86drmMode.h"
 
 #include "common.h"
-#include "intel.h"
 #include "intel_driver.h"
 #include "legacy/legacy.h"
 #include "sna/sna_module.h"
 
-#include <xf86drmMode.h>
-
 static struct intel_device_info *chipset_info;
 
 static const struct intel_device_info intel_i81x_info = {
@@ -140,6 +138,8 @@ static const SymTabRec _intel_chipsets[] = {
 	{PCI_CHIP_IVYBRIDGE_S_GT1,		"Ivybridge Server" },
 	{-1,					NULL}
 };
+#define NUM_CHIPSETS (sizeof(_intel_chipsets) / sizeof(_intel_chipsets[0]))
+
 SymTabRec *intel_chipsets = (SymTabRec *) _intel_chipsets;
 
 #define INTEL_DEVICE_MATCH(d,i) \
@@ -298,7 +298,7 @@ static Bool intel_pci_probe(DriverPtr		driver,
 			    intptr_t		match_data)
 {
 	ScrnInfoPtr scrn;
-	PciChipsets intel_pci_chipsets[ARRAY_SIZE(_intel_chipsets)];
+	PciChipsets intel_pci_chipsets[NUM_CHIPSETS];
 	unsigned i;
 
 	chipset_info = (void *)match_data;
@@ -319,7 +319,7 @@ static Bool intel_pci_probe(DriverPtr		driver,
 #endif
 	}
 
-	for (i = 0; i < ARRAY_SIZE(_intel_chipsets); i++) {
+	for (i = 0; i < NUM_CHIPSETS; i++) {
 		intel_pci_chipsets[i].numChipset = intel_chipsets[i].token;
 		intel_pci_chipsets[i].PCIid = intel_chipsets[i].token;
 		intel_pci_chipsets[i].dummy = NULL;
diff --git a/src/intel_video.c b/src/intel_video.c
index d1d6126..6d74d51 100644
--- a/src/intel_video.c
+++ b/src/intel_video.c
@@ -61,6 +61,7 @@
 #include "xf86PciInfo.h"
 #include "xf86Pci.h"
 #include "xf86fbman.h"
+#include "xf86drm.h"
 #include "regionstr.h"
 #include "randrstr.h"
 #include "windowstr.h"
commit 3320b459d6fe90d1145e3874c840b95279fb16c8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 17 12:08:24 2011 +0000

    sna: Clean up caches stored upon the batch bo
    
    Until the advent of the VMA cache, we were safe to reap the batch bo by
    hand. However, as we continue to add additional data to the bo, it is
    wise to use the common free function.
    
    Reported-by: Clemens Eisserer <linuxhippy at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=43899
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index a661041..a95ba65 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -796,8 +796,7 @@ bool kgem_retire(struct kgem *kgem)
 			retired = true;
 		} else {
 			kgem->need_purge = 1;
-			gem_close(kgem->fd, rq->bo->handle);
-			free(rq->bo);
+			kgem_bo_free(kgem, rq->bo);
 		}
 
 		list_del(&rq->list);


More information about the xorg-commit mailing list