xf86-video-intel: 5 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_dri.c src/sna/sna_io.c src/sna/sna_render.c src/sna/sna_trapezoids.c src/sna/sna_video_sprite.c src/sna/sna_video_textured.c

Chris Wilson ickle at kemper.freedesktop.org
Sun Dec 9 04:48:53 PST 2012


 src/sna/gen2_render.c        |   16 +++++------
 src/sna/gen3_render.c        |   14 +++++-----
 src/sna/gen4_render.c        |   22 +++++++++++-----
 src/sna/gen5_render.c        |   28 +++++++++++---------
 src/sna/gen6_render.c        |   44 ++++++++++++++++----------------
 src/sna/gen7_render.c        |   22 ++++++++--------
 src/sna/kgem.c               |   58 +++++--------------------------------------
 src/sna/kgem.h               |   19 ++++----------
 src/sna/sna_accel.c          |   29 +++++++++++----------
 src/sna/sna_blt.c            |   12 ++++----
 src/sna/sna_dri.c            |   26 +++++++++++--------
 src/sna/sna_io.c             |   14 +++++-----
 src/sna/sna_render.c         |    2 -
 src/sna/sna_trapezoids.c     |   35 ++++++++++++++++++++++++-
 src/sna/sna_video_sprite.c   |    1 
 src/sna/sna_video_textured.c |    5 ++-
 16 files changed, 174 insertions(+), 173 deletions(-)

New commits:
commit aa8c5d8201006397bb32ed6bc28618f9aa77a68a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Dec 6 22:08:08 2012 +0000

    sna/sprite: Add a DBG to report whether the kernel supports sprites
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_video_sprite.c b/src/sna/sna_video_sprite.c
index b51bf16..79b4af2 100644
--- a/src/sna/sna_video_sprite.c
+++ b/src/sna/sna_video_sprite.c
@@ -380,6 +380,7 @@ XF86VideoAdaptorPtr sna_video_sprite_setup(struct sna *sna,
 	memset(&r, 0, sizeof(struct drm_mode_get_plane_res));
 	if (drmIoctl(sna->kgem.fd, DRM_IOCTL_MODE_GETPLANERESOURCES, &r))
 		return NULL;
+	DBG(("%s: %d sprite planes\n", __FUNCTION__, r.count_planes));
 	if (r.count_planes == 0)
 		return NULL;
 
commit 25c0d440dee45c03f5e45b8e0e45071c0c32f507
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 9 12:11:53 2012 +0000

    sna: Move source to CPU prior to referencing for inplace trapezoids
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56825
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index c7edad6..e996cdb 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4736,6 +4736,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 					goto fallback;
 			}
 
+			assert(dst_priv->clear == false);
 			dst_priv->cpu = false;
 			if (damage) {
 				assert(dst_priv->gpu_bo->proxy == NULL);
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 8f2ea34..289780b 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -4855,6 +4855,20 @@ unbounded_pass:
 
 		op = 0;
 	} else {
+		if (src->pDrawable) {
+			if (!sna_drawable_move_to_cpu(src->pDrawable,
+						      MOVE_READ)) {
+				mono_fini(&mono);
+				return false;
+			}
+			if (src->alphaMap &&
+			    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+						      MOVE_READ)) {
+				mono_fini(&mono);
+				return false;
+			}
+		}
+
 		inplace.composite.dst = image_from_pict(dst, false,
 							&inplace.composite.dx,
 							&inplace.composite.dy);
@@ -5091,6 +5105,20 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 		} else {
 			struct pixman_inplace pi;
 
+			if (src->pDrawable) {
+				if (!sna_drawable_move_to_cpu(src->pDrawable,
+							MOVE_READ)) {
+					tor_fini(&tor);
+					return false;
+				}
+				if (src->alphaMap &&
+				    !sna_drawable_move_to_cpu(src->alphaMap->pDrawable,
+							      MOVE_READ)) {
+					tor_fini(&tor);
+					return false;
+				}
+			}
+
 			pi.image = image_from_pict(dst, false, &pi.dx, &pi.dy);
 			pi.source = image_from_pict(src, false, &pi.sx, &pi.sy);
 			pi.sx += src_x - pixman_fixed_to_int(traps[0].left.p1.x);
@@ -5519,10 +5547,13 @@ sna_composite_trapezoids(CARD8 op,
 	force_fallback = FORCE_FALLBACK > 0;
 	if ((too_small(priv) || DAMAGE_IS_ALL(priv->cpu_damage)) &&
 	    !picture_is_gpu(src)) {
-		DBG(("%s: force fallbacks -- dst is too small, %dx%d\n",
+		DBG(("%s: force fallbacks --too small, %dx%d? %d, all-cpu? %d, src-is-cpu? %d\n",
 		     __FUNCTION__,
 		     dst->pDrawable->width,
-		     dst->pDrawable->height));
+		     dst->pDrawable->height,
+		     too_small(priv),
+		     DAMAGE_IS_ALL(priv->cpu_damage),
+		     !picture_is_gpu(src)));
 		force_fallback = true;
 	}
 	if (FORCE_FALLBACK < 0)
commit 3e82fcc8d243b7ffe1a3d3249a5cdb5fd068093f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 9 11:19:13 2012 +0000

    sna/gen4+: Refine test for preferring GPU spans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 14ac123..873e1a4 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2680,7 +2680,17 @@ gen4_check_composite_spans(struct sna *sna,
 		return FORCE_SPANS > 0;
 
 	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
-		DBG(("%s: fallback, non-rectilinear spans\n",
+		if ((flags & COMPOSITE_SPANS_INPLACE_HINT) == 0) {
+			struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
+			assert(priv);
+
+			if ((priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) ||
+			    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) {
+				return true;
+			}
+		}
+
+		DBG(("%s: fallback, non-rectilinear spans to idle bo\n",
 		     __FUNCTION__));
 		return false;
 	}
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index f8cea61..bff5066 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2692,13 +2692,15 @@ gen5_check_composite_spans(struct sna *sna,
 		return false;
 	}
 
-	if ((flags & (COMPOSITE_SPANS_RECTILINEAR | COMPOSITE_SPANS_INPLACE_HINT)) == 0) {
-		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
-		assert(priv);
-
-		if ((priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) ||
-		    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) {
-			return true;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
+		if ((flags & COMPOSITE_SPANS_INPLACE_HINT) == 0) {
+			struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
+			assert(priv);
+
+			if ((priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) ||
+			    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) {
+				return true;
+			}
 		}
 
 		DBG(("%s: fallback, non-rectilinear spans to idle bo\n",
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 1677a47..6483092 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -3123,13 +3123,15 @@ gen6_check_composite_spans(struct sna *sna,
 		return false;
 	}
 
-	if ((flags & (COMPOSITE_SPANS_RECTILINEAR | COMPOSITE_SPANS_INPLACE_HINT)) == 0) {
-		struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
-		assert(priv);
-
-		if ((priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) ||
-		    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) {
-			return true;
+	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0) {
+		if ((flags & COMPOSITE_SPANS_INPLACE_HINT) == 0) {
+			struct sna_pixmap *priv = sna_pixmap_from_drawable(dst->pDrawable);
+			assert(priv);
+
+			if ((priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) ||
+			    (priv->gpu_bo && kgem_bo_is_busy(priv->gpu_bo))) {
+				return true;
+			}
 		}
 
 		DBG(("%s: fallback, non-rectilinear spans to idle bo\n",
commit c8f622726a4463b419d032b379576cfb3bc492df
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Dec 9 09:26:03 2012 +0000

    sna: Replace remaining kgem_is_idle() with kgem_ring_is_idle()
    
    Further experimentation...
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 2123cb9..eeb0af4 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -3191,7 +3191,7 @@ gen2_render_context_switch(struct kgem *kgem,
 	/* Reload BLT registers following a lost context */
 	sna->blt_state.fill_bo = 0;
 
-	if (kgem_is_idle(kgem)) {
+	if (kgem_ring_is_idle(kgem, kgem->ring)) {
 		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
 		_kgem_submit(kgem);
 	}
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index d0e570e..f8cea61 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -3585,7 +3585,7 @@ gen5_render_context_switch(struct kgem *kgem,
 		sna->render_state.gen5.drawrect_limit = -1;
 	}
 
-	if (kgem_is_idle(kgem)) {
+	if (kgem_ring_is_idle(kgem, kgem->ring)) {
 		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
 		_kgem_submit(kgem);
 	}
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 47dd7d5..921fdf8 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1733,6 +1733,8 @@ static bool kgem_retire__flushing(struct kgem *kgem)
 	}
 #endif
 
+	kgem->need_retire |= !list_is_empty(&kgem->flushing);
+
 	return retired;
 }
 
@@ -1824,7 +1826,6 @@ static bool kgem_retire__requests_ring(struct kgem *kgem, int ring)
 		}
 
 		__kgem_request_free(rq);
-		kgem->num_requests--;
 	}
 
 #if HAS_DEBUG_FULL
@@ -1854,20 +1855,10 @@ static bool kgem_retire__requests(struct kgem *kgem)
 	bool retired = false;
 	int n;
 
-	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++)
+	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
 		retired |= kgem_retire__requests_ring(kgem, n);
-
-#if HAS_DEBUG_FULL
-	{
-		int count = 0;
-
-		for (n = 0; n < ARRAY_SIZE(kgem->requests); n++)
-			list_for_each_entry(bo, &kgem->requests[n], request)
-				count++;
-
-		assert(count == kgem->num_requests);
+		kgem->need_retire |= !list_is_empty(&kgem->requests[n]);
 	}
-#endif
 
 	return retired;
 }
@@ -1878,14 +1869,12 @@ bool kgem_retire(struct kgem *kgem)
 
 	DBG(("%s\n", __FUNCTION__));
 
+	kgem->need_retire = false;
+
 	retired |= kgem_retire__flushing(kgem);
-	if (kgem->num_requests)
-		retired |= kgem_retire__requests(kgem);
+	retired |= kgem_retire__requests(kgem);
 	retired |= kgem_retire__buffers(kgem);
 
-	kgem->need_retire =
-		kgem->num_requests ||
-		!list_is_empty(&kgem->flushing);
 	DBG(("%s -- retired=%d, need_retire=%d\n",
 	     __FUNCTION__, retired, kgem->need_retire));
 
@@ -1894,39 +1883,10 @@ bool kgem_retire(struct kgem *kgem)
 	return retired;
 }
 
-bool __kgem_is_idle(struct kgem *kgem)
-{
-	int n;
-
-	assert(kgem->num_requests);
-
-	for (n = 0; n < ARRAY_SIZE(kgem->requests); n++) {
-		struct kgem_request *rq;
-
-		if (list_is_empty(&kgem->requests[n]))
-			continue;
-
-		rq = list_last_entry(&kgem->requests[n],
-				     struct kgem_request, list);
-		if (kgem_busy(kgem, rq->bo->handle)) {
-			DBG(("%s: last requests handle=%d still busy\n",
-			     __FUNCTION__, rq->bo->handle));
-			return false;
-		}
-
-		DBG(("%s: ring=%d idle (handle=%d)\n",
-		     __FUNCTION__, n, rq->bo->handle));
-	}
-	kgem_retire__requests(kgem);
-	assert(kgem->num_requests == 0);
-	return true;
-}
-
 bool __kgem_ring_is_idle(struct kgem *kgem, int ring)
 {
 	struct kgem_request *rq;
 
-	assert(kgem->num_requests);
 	assert(!list_is_empty(&kgem->requests[ring]));
 
 	rq = list_last_entry(&kgem->requests[ring],
@@ -2005,7 +1965,6 @@ static void kgem_commit(struct kgem *kgem)
 	} else {
 		list_add_tail(&rq->list, &kgem->requests[rq->ring]);
 		kgem->need_throttle = kgem->need_retire = 1;
-		kgem->num_requests++;
 	}
 
 	kgem->next_request = NULL;
@@ -2180,7 +2139,6 @@ static void kgem_cleanup(struct kgem *kgem)
 		}
 	}
 
-	kgem->num_requests = 0;
 	kgem_close_inactive(kgem);
 }
 
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 5cd22cc..21398bf 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -132,7 +132,6 @@ struct kgem {
 
 	struct list requests[2];
 	struct kgem_request *next_request;
-	uint32_t num_requests;
 
 	struct {
 		struct list inactive[NUM_CACHE_BUCKETS];
@@ -275,17 +274,6 @@ int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
 void kgem_bo_retire(struct kgem *kgem, struct kgem_bo *bo);
 bool kgem_retire(struct kgem *kgem);
 
-bool __kgem_is_idle(struct kgem *kgem);
-static inline bool kgem_is_idle(struct kgem *kgem)
-{
-	if (kgem->num_requests == 0) {
-		DBG(("%s: no outstanding requests\n", __FUNCTION__));
-		return true;
-	}
-
-	return __kgem_is_idle(kgem);
-}
-
 bool __kgem_ring_is_idle(struct kgem *kgem, int ring);
 static inline bool kgem_ring_is_idle(struct kgem *kgem, int ring)
 {
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 886bff8..c7edad6 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -14376,7 +14376,7 @@ void sna_accel_block_handler(struct sna *sna, struct timeval **tv)
 	if (sna->timer_active)
 		UpdateCurrentTimeIf();
 
-	if (sna->kgem.nbatch && kgem_is_idle(&sna->kgem)) {
+	if (sna->kgem.nbatch && kgem_ring_is_idle(&sna->kgem, sna->kgem.ring)) {
 		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
 		_kgem_submit(&sna->kgem);
 	}
@@ -14436,7 +14436,7 @@ void sna_accel_wakeup_handler(struct sna *sna)
 
 	if (sna->kgem.need_retire)
 		kgem_retire(&sna->kgem);
-	if (sna->kgem.nbatch && kgem_is_idle(&sna->kgem)) {
+	if (sna->kgem.nbatch && !sna->kgem.need_retire) {
 		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
 		_kgem_submit(&sna->kgem);
 	}
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 27168ac..994f0a6 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -249,7 +249,7 @@ no_render_context_switch(struct kgem *kgem,
 	if (!kgem->mode)
 		return;
 
-	if (kgem_is_idle(kgem)) {
+	if (kgem_ring_is_idle(kgem, kgem->ring)) {
 		DBG(("%s: GPU idle, flushing\n", __FUNCTION__));
 		_kgem_submit(kgem);
 	}
commit 4e4e10935d2815fb62aeaedbfffe10aad115ec0b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Dec 8 22:39:32 2012 +0000

    sna: Flush upon change of target if GPU is idle
    
    The aim is to improve GPU concurrency by keeping it busy. The possible
    complication is that we incur more overhead due to small batches.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index b37af9b..2123cb9 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -536,9 +536,9 @@ static void gen2_emit_invariant(struct sna *sna)
 }
 
 static void
-gen2_get_batch(struct sna *sna)
+gen2_get_batch(struct sna *sna, const struct sna_composite_op *op)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
 	if (!kgem_check_batch(&sna->kgem, INVARIANT_SIZE+40)) {
 		DBG(("%s: flushing batch: size %d > %d\n",
@@ -662,7 +662,7 @@ static void gen2_emit_composite_state(struct sna *sna,
 	uint32_t cblend, ablend;
 	int tex;
 
-	gen2_get_batch(sna);
+	gen2_get_batch(sna, op);
 
 	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
@@ -2146,7 +2146,7 @@ static void gen2_emit_composite_spans_state(struct sna *sna,
 {
 	uint32_t unwind;
 
-	gen2_get_batch(sna);
+	gen2_get_batch(sna, &op->base);
 	gen2_emit_target(sna, &op->base);
 
 	unwind = sna->kgem.nbatch;
@@ -2404,7 +2404,7 @@ static void gen2_emit_fill_composite_state(struct sna *sna,
 {
 	uint32_t ls1;
 
-	gen2_get_batch(sna);
+	gen2_get_batch(sna, op);
 	gen2_emit_target(sna, op);
 
 	ls1 = sna->kgem.nbatch;
@@ -2589,7 +2589,7 @@ static void gen2_emit_fill_state(struct sna *sna,
 {
 	uint32_t ls1;
 
-	gen2_get_batch(sna);
+	gen2_get_batch(sna, op);
 	gen2_emit_target(sna, op);
 
 	ls1 = sna->kgem.nbatch;
@@ -2882,7 +2882,7 @@ static void gen2_emit_copy_state(struct sna *sna, const struct sna_composite_op
 {
 	uint32_t ls1, v;
 
-	gen2_get_batch(sna);
+	gen2_get_batch(sna, op);
 
 	if (kgem_bo_is_dirty(op->src.bo)) {
 		if (op->src.bo == op->dst.bo)
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 9dcdfcd..b092976 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1298,9 +1298,9 @@ static void gen3_emit_invariant(struct sna *sna)
 #define MAX_OBJECTS 3 /* worst case: dst + src + mask  */
 
 static void
-gen3_get_batch(struct sna *sna)
+gen3_get_batch(struct sna *sna, const struct sna_composite_op *op)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
 	if (!kgem_check_batch(&sna->kgem, 200)) {
 		DBG(("%s: flushing batch: size %d > %d\n",
@@ -1389,7 +1389,7 @@ static void gen3_emit_composite_state(struct sna *sna,
 	unsigned int tex_count, n;
 	uint32_t ss2;
 
-	gen3_get_batch(sna);
+	gen3_get_batch(sna, op);
 
 	if (kgem_bo_is_dirty(op->src.bo) || kgem_bo_is_dirty(op->mask.bo)) {
 		if (op->src.bo == op->dst.bo || op->mask.bo == op->dst.bo)
@@ -3841,9 +3841,9 @@ gen3_emit_video_state(struct sna *sna,
 }
 
 static void
-gen3_video_get_batch(struct sna *sna)
+gen3_video_get_batch(struct sna *sna, struct kgem_bo *bo)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, bo);
 
 	if (!kgem_check_batch(&sna->kgem, 120) ||
 	    !kgem_check_reloc(&sna->kgem, 4) ||
@@ -3934,13 +3934,13 @@ gen3_render_video(struct sna *sna,
 	     __FUNCTION__,
 	     dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff));
 
-	gen3_video_get_batch(sna);
+	gen3_video_get_batch(sna, dst_bo);
 	gen3_emit_video_state(sna, video, frame, pixmap,
 			      dst_bo, width, height);
 	do {
 		int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
 		if (nbox_this_time == 0) {
-			gen3_video_get_batch(sna);
+			gen3_video_get_batch(sna, dst_bo);
 			gen3_emit_video_state(sna, video, frame, pixmap,
 					      dst_bo, width, height);
 			nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4);
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 18a6bf6..14ac123 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1222,9 +1222,9 @@ gen4_emit_invariant(struct sna *sna)
 }
 
 static void
-gen4_get_batch(struct sna *sna)
+gen4_get_batch(struct sna *sna, const struct sna_composite_op *op)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
 	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
 		DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1446,7 +1446,7 @@ gen4_bind_surfaces(struct sna *sna,
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen4_get_batch(sna);
+	gen4_get_batch(sna, op);
 
 	binding_table = gen4_composite_get_binding_table(sna, &offset);
 
@@ -1637,7 +1637,7 @@ static void gen4_video_bind_surfaces(struct sna *sna,
 		n_src = 1;
 	}
 
-	gen4_get_batch(sna);
+	gen4_get_batch(sna, op);
 
 	binding_table = gen4_composite_get_binding_table(sna, &offset);
 
@@ -2798,7 +2798,7 @@ gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op)
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen4_get_batch(sna);
+	gen4_get_batch(sna, op);
 
 	binding_table = gen4_composite_get_binding_table(sna, &offset);
 
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 284e8da..d0e570e 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1227,9 +1227,9 @@ gen5_emit_invariant(struct sna *sna)
 }
 
 static void
-gen5_get_batch(struct sna *sna)
+gen5_get_batch(struct sna *sna, const struct sna_composite_op *op)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
 	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
 		DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1447,7 +1447,7 @@ static void gen5_bind_surfaces(struct sna *sna,
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen5_get_batch(sna);
+	gen5_get_batch(sna, op);
 
 	binding_table = gen5_composite_get_binding_table(sna, &offset);
 
@@ -1636,7 +1636,7 @@ static void gen5_video_bind_surfaces(struct sna *sna,
 		n_src = 1;
 	}
 
-	gen5_get_batch(sna);
+	gen5_get_batch(sna, op);
 	binding_table = gen5_composite_get_binding_table(sna, &offset);
 
 	binding_table[0] =
@@ -2808,7 +2808,7 @@ gen5_copy_bind_surfaces(struct sna *sna,
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen5_get_batch(sna);
+	gen5_get_batch(sna, op);
 
 	binding_table = gen5_composite_get_binding_table(sna, &offset);
 
@@ -3137,7 +3137,7 @@ gen5_fill_bind_surfaces(struct sna *sna,
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen5_get_batch(sna);
+	gen5_get_batch(sna, op);
 
 	binding_table = gen5_composite_get_binding_table(sna, &offset);
 
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 50df557..1677a47 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1694,10 +1694,10 @@ gen6_choose_composite_vertex_buffer(const struct sna_composite_op *op)
 	return id;
 }
 
-static void
-gen6_get_batch(struct sna *sna)
+static bool
+gen6_get_batch(struct sna *sna, const struct sna_composite_op *op)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
 	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
 		DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1709,6 +1709,8 @@ gen6_get_batch(struct sna *sna)
 
 	if (sna->render_state.gen6.needs_invariant)
 		gen6_emit_invariant(sna);
+
+	return kgem_bo_is_dirty(op->dst.bo);
 }
 
 static void gen6_emit_composite_state(struct sna *sna,
@@ -1718,8 +1720,7 @@ static void gen6_emit_composite_state(struct sna *sna,
 	uint16_t offset;
 	bool dirty;
 
-	gen6_get_batch(sna);
-	dirty = kgem_bo_is_dirty(op->dst.bo);
+	dirty = gen6_get_batch(sna, op);
 
 	binding_table = gen6_composite_get_binding_table(sna, &offset);
 
@@ -1918,8 +1919,7 @@ static void gen6_emit_video_state(struct sna *sna,
 	bool dirty;
 	int n_src, n;
 
-	gen6_get_batch(sna);
-	dirty = kgem_bo_is_dirty(op->dst.bo);
+	dirty = gen6_get_batch(sna, op);
 
 	src_surf_base[0] = 0;
 	src_surf_base[1] = 0;
@@ -2022,7 +2022,7 @@ gen6_render_video(struct sna *sna,
 			       2);
 	tmp.priv = frame;
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
@@ -2847,7 +2847,7 @@ gen6_render_composite(struct sna *sna,
 	tmp->boxes = gen6_render_composite_boxes;
 	tmp->done  = gen6_render_composite_done;
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
 	if (!kgem_check_bo(&sna->kgem,
 			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
 			   NULL)) {
@@ -3219,7 +3219,7 @@ gen6_render_composite_spans(struct sna *sna,
 	tmp->boxes = gen6_render_composite_spans_boxes;
 	tmp->done  = gen6_render_composite_spans_done;
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
 	if (!kgem_check_bo(&sna->kgem,
 			   tmp->base.dst.bo, tmp->base.src.bo,
 			   NULL)) {
@@ -3253,8 +3253,7 @@ gen6_emit_copy_state(struct sna *sna,
 	uint16_t offset;
 	bool dirty;
 
-	gen6_get_batch(sna);
-	dirty = kgem_bo_is_dirty(op->dst.bo);
+	dirty = gen6_get_batch(sna, op);
 
 	binding_table = gen6_composite_get_binding_table(sna, &offset);
 
@@ -3482,7 +3481,7 @@ fallback_blt:
 	assert(GEN6_SAMPLER(tmp.u.gen6.flags) == COPY_SAMPLER);
 	assert(GEN6_VERTEX(tmp.u.gen6.flags) == COPY_VERTEX);
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
@@ -3638,7 +3637,7 @@ fallback:
 	assert(GEN6_SAMPLER(op->base.u.gen6.flags) == COPY_SAMPLER);
 	assert(GEN6_VERTEX(op->base.u.gen6.flags) == COPY_VERTEX);
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
@@ -3661,8 +3660,7 @@ gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
 	uint16_t offset;
 	bool dirty;
 
-	gen6_get_batch(sna);
-	dirty = kgem_bo_is_dirty(op->dst.bo);
+	dirty = gen6_get_batch(sna, op);
 
 	binding_table = gen6_composite_get_binding_table(sna, &offset);
 
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index eb34ff2..fea5a10 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -1810,9 +1810,9 @@ gen7_choose_composite_vertex_buffer(const struct sna_composite_op *op)
 }
 
 static void
-gen7_get_batch(struct sna *sna)
+gen7_get_batch(struct sna *sna, const struct sna_composite_op *op)
 {
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, op->dst.bo);
 
 	if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) {
 		DBG(("%s: flushing batch: %d < %d+%d\n",
@@ -1835,7 +1835,7 @@ static void gen7_emit_composite_state(struct sna *sna,
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen7_get_batch(sna);
+	gen7_get_batch(sna, op);
 
 	binding_table = gen7_composite_get_binding_table(sna, &offset);
 
@@ -2031,7 +2031,7 @@ static void gen7_emit_video_state(struct sna *sna,
 	uint16_t offset;
 	int n_src, n;
 
-	gen7_get_batch(sna);
+	gen7_get_batch(sna, op);
 
 	src_surf_base[0] = 0;
 	src_surf_base[1] = 0;
@@ -2134,7 +2134,7 @@ gen7_render_video(struct sna *sna,
 			       2);
 	tmp.priv = frame;
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		assert(kgem_check_bo(&sna->kgem, tmp.dst.bo, frame->bo, NULL));
@@ -2955,7 +2955,7 @@ gen7_render_composite(struct sna *sna,
 	tmp->boxes = gen7_render_composite_boxes;
 	tmp->done  = gen7_render_composite_done;
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->dst.bo);
 	if (!kgem_check_bo(&sna->kgem,
 			   tmp->dst.bo, tmp->src.bo, tmp->mask.bo,
 			   NULL)) {
@@ -3309,7 +3309,7 @@ gen7_render_composite_spans(struct sna *sna,
 	tmp->boxes = gen7_render_composite_spans_boxes;
 	tmp->done  = gen7_render_composite_spans_done;
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp->base.dst.bo);
 	if (!kgem_check_bo(&sna->kgem,
 			   tmp->base.dst.bo, tmp->base.src.bo,
 			   NULL)) {
@@ -3342,7 +3342,7 @@ gen7_emit_copy_state(struct sna *sna,
 	uint32_t *binding_table;
 	uint16_t offset;
 
-	gen7_get_batch(sna);
+	gen7_get_batch(sna, op);
 
 	binding_table = gen7_composite_get_binding_table(sna, &offset);
 
@@ -3563,7 +3563,7 @@ fallback_blt:
 
 	tmp.u.gen7.flags = COPY_FLAGS(alu);
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, tmp.dst.bo);
 	if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, tmp.dst.bo, tmp.src.bo, NULL))
@@ -3711,7 +3711,7 @@ fallback:
 
 	op->base.u.gen7.flags = COPY_FLAGS(alu);
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	kgem_set_mode(&sna->kgem, KGEM_RENDER, dst_bo);
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
@@ -3739,7 +3739,7 @@ gen7_emit_fill_state(struct sna *sna, const struct sna_composite_op *op)
 	 * specific kernel.
 	 */
 
-	gen7_get_batch(sna);
+	gen7_get_batch(sna, op);
 
 	binding_table = gen7_composite_get_binding_table(sna, &offset);
 
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index c497ce7..47dd7d5 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -5228,7 +5228,7 @@ kgem_replace_bo(struct kgem *kgem,
 	dst->unique_id = kgem_get_unique_id(kgem);
 	dst->refcnt = 1;
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, dst);
 	if (!kgem_check_batch(kgem, 8) ||
 	    !kgem_check_reloc(kgem, 2) ||
 	    !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index e7e53af..5cd22cc 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -347,7 +347,9 @@ static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 
 void kgem_clear_dirty(struct kgem *kgem);
 
-static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
+static inline void kgem_set_mode(struct kgem *kgem,
+				 enum kgem_mode mode,
+				 struct kgem_bo *bo)
 {
 	assert(!kgem->wedged);
 
@@ -355,6 +357,9 @@ static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
 	kgem_submit(kgem);
 #endif
 
+	if (kgem->mode && bo->exec == NULL && kgem_ring_is_idle(kgem, kgem->ring))
+		kgem_submit(kgem);
+
 	if (kgem->mode == mode)
 		return;
 
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 51ea1a8..886bff8 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3800,7 +3800,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	x += dx + drawable->x;
 	y += dy + drawable->y;
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 
 	/* Region is pre-clipped and translated into pixmap space */
 	box = REGION_RECTS(region);
@@ -3922,7 +3922,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	x += dx + drawable->x;
 	y += dy + drawable->y;
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 
 	skip = h * BitmapBytePad(w + left);
 	for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) {
@@ -6137,7 +6137,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
 	br13 |= blt_depth(drawable->depth) << 24;
 	br13 |= copy_ROP[gc->alu] << 16;
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo);
 	do {
 		int bx1 = (box->x1 + sx) & ~7;
 		int bx2 = (box->x2 + sx + 7) & ~7;
@@ -6301,7 +6301,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
 	br13 |= blt_depth(drawable->depth) << 24;
 	br13 |= copy_ROP[gc->alu] << 16;
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, arg->bo);
 	do {
 		int bx1 = (box->x1 + sx) & ~7;
 		int bx2 = (box->x2 + sx + 7) & ~7;
@@ -9892,7 +9892,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
 	DBG(("%s x %d [(%d, %d)x(%d, %d)...], clipped=%x\n",
 	     __FUNCTION__, n, r->x, r->y, r->width, r->height, clipped));
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(&sna->kgem, 8+2*3) ||
 	    !kgem_check_reloc(&sna->kgem, 2) ||
 	    !kgem_check_bo_fenced(&sna->kgem, bo)) {
@@ -10526,7 +10526,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
 		} while (--j);
 	}
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(&sna->kgem, 9 + 2*3) ||
 	    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 	    !kgem_check_reloc(&sna->kgem, 1)) {
@@ -10802,7 +10802,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 	     origin->x, origin->y));
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 
 	br00 = 3 << 20;
 	br13 = bo->pitch;
@@ -11498,7 +11498,7 @@ sna_poly_fill_rect_stippled_n_blt__imm(DrawablePtr drawable,
 	     clipped, gc->alu, gc->fillStyle == FillOpaqueStippled));
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 
 	br00 = XY_MONO_SRC_COPY_IMM | 3 << 20;
 	br13 = bo->pitch;
@@ -11643,7 +11643,7 @@ sna_poly_fill_rect_stippled_n_blt(DrawablePtr drawable,
 							      extents, clipped);
 
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 
 	br00 = XY_MONO_SRC_COPY | 3 << 20;
 	br13 = bo->pitch;
@@ -12284,7 +12284,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 				   bo, drawable->bitsPerPixel,
 				   bg, extents, REGION_NUM_RECTS(clip));
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(&sna->kgem, 16) ||
 	    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 	    !kgem_check_reloc(&sna->kgem, 1)) {
@@ -12929,7 +12929,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
 				   bo, drawable->bitsPerPixel,
 				   bg, extents, REGION_NUM_RECTS(clip));
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(&sna->kgem, 16) ||
 	    !kgem_check_bo_fenced(&sna->kgem, bo) ||
 	    !kgem_check_reloc(&sna->kgem, 1)) {
@@ -13309,7 +13309,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
 	     region->extents.x1, region->extents.y1,
 	     region->extents.x2, region->extents.y2));
 
-	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
 
 	/* Region is pre-clipped and translated into pixmap space */
 	box = REGION_RECTS(region);
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 2248c4d..e9ca0dd 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -145,7 +145,7 @@ static bool sna_blt_fill_init(struct sna *sna,
 	blt->pixel = pixel;
 	blt->bpp = bpp;
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(kgem, 12) ||
 	    !kgem_check_bo_fenced(kgem, bo)) {
 		_kgem_submit(kgem);
@@ -289,7 +289,7 @@ static bool sna_blt_copy_init(struct sna *sna,
 	case 8: break;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, dst);
 	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
 		_kgem_submit(kgem);
 		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
@@ -341,7 +341,7 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna,
 	}
 	blt->pixel = alpha;
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, dst);
 	if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
 		_kgem_submit(kgem);
 		if (!kgem_check_many_bo_fenced(kgem, src, dst, NULL))
@@ -2263,7 +2263,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 		return false;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(kgem, 6) ||
 	    !kgem_check_reloc(kgem, 1) ||
 	    !kgem_check_bo_fenced(kgem, bo)) {
@@ -2339,7 +2339,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
 	case 8: break;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, bo);
 	if (!kgem_check_batch(kgem, 12) ||
 	    !kgem_check_bo_fenced(kgem, bo)) {
 		_kgem_submit(kgem);
@@ -2512,7 +2512,7 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 		kgem->nreloc--;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
 	if (!kgem_check_batch(kgem, 8) ||
 	    !kgem_check_reloc(kgem, 2) ||
 	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index 6976143..4453997 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -492,19 +492,17 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
 	DamageRegionProcessPending(&pixmap->drawable);
 }
 
-static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync)
+static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *dst, struct kgem_bo *src, bool sync)
 {
 	struct drm_i915_gem_busy busy;
 	int mode;
 
-	if (sna->kgem.gen < 060) {
-		kgem_set_mode(&sna->kgem, KGEM_BLT);
+	if (sna->kgem.gen < 060)
 		return;
-	}
 
 	if (sync) {
 		DBG(("%s: sync, force RENDER ring\n", __FUNCTION__));
-		kgem_set_mode(&sna->kgem, KGEM_RENDER);
+		kgem_set_mode(&sna->kgem, KGEM_RENDER, dst);
 		return;
 	}
 
@@ -515,7 +513,7 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync)
 
 	if (sna->kgem.has_semaphores) {
 		DBG(("%s: have sempahores, prefering RENDER\n", __FUNCTION__));
-		kgem_set_mode(&sna->kgem, KGEM_RENDER);
+		kgem_set_mode(&sna->kgem, KGEM_RENDER, dst);
 		return;
 	}
 
@@ -526,8 +524,14 @@ static void sna_dri_select_mode(struct sna *sna, struct kgem_bo *src, bool sync)
 
 	DBG(("%s: src busy?=%x\n", __FUNCTION__, busy.busy));
 	if (busy.busy == 0) {
-		DBG(("%s: src is idle, using defaults\n", __FUNCTION__));
-		return;
+		busy.handle = dst->handle;
+		if (drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_BUSY, &busy))
+			return;
+		DBG(("%s: dst busy?=%x\n", __FUNCTION__, busy.busy));
+		if (busy.busy == 0) {
+			DBG(("%s: src/dst is idle, using defaults\n", __FUNCTION__));
+			return;
+		}
 	}
 
 	/* Sandybridge introduced a separate ring which it uses to
@@ -611,7 +615,7 @@ sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
 		if (sync)
 			sync = sna_pixmap_is_scanout(sna, pixmap);
 
-		sna_dri_select_mode(sna, src_bo, sync);
+		sna_dri_select_mode(sna, dst_bo, src_bo, sync);
 	} else
 		sync = false;
 
@@ -755,7 +759,7 @@ sna_dri_copy_from_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
 				      dst_bo, -draw->x, -draw->y,
 				      boxes, n);
 	} else {
-		sna_dri_select_mode(sna, src_bo, false);
+		sna_dri_select_mode(sna, dst_bo, src_bo, false);
 		sna->render.copy_boxes(sna, GXcopy,
 				       pixmap, src_bo, dx, dy,
 				       (PixmapPtr)draw, dst_bo, -draw->x, -draw->y,
@@ -804,7 +808,7 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
 				      dst_bo, 0, 0,
 				      boxes, n);
 	} else {
-		sna_dri_select_mode(sna, src_bo, false);
+		sna_dri_select_mode(sna, dst_bo, src_bo, false);
 		sna->render.copy_boxes(sna, GXcopy,
 				       (PixmapPtr)draw, src_bo, 0, 0,
 				       (PixmapPtr)draw, dst_bo, 0, 0,
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index bb7f9f9..ec254fc 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -378,9 +378,9 @@ fallback:
 	case 1: break;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
-	if (!kgem_check_reloc_and_exec(kgem, 2) ||
-	    !kgem_check_batch(kgem, 8) ||
+	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
+	if (!kgem_check_batch(kgem, 8) ||
+	    !kgem_check_reloc_and_exec(kgem, 2) ||
 	    !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
 		_kgem_submit(kgem);
 		_kgem_set_mode(kgem, KGEM_BLT);
@@ -824,7 +824,7 @@ tile:
 	case 8: break;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
+	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
 	if (!kgem_check_batch(kgem, 8) ||
 	    !kgem_check_reloc_and_exec(kgem, 2) ||
 	    !kgem_check_bo_fenced(kgem, dst_bo)) {
@@ -1193,9 +1193,9 @@ tile:
 	case 8: break;
 	}
 
-	kgem_set_mode(kgem, KGEM_BLT);
-	if (!kgem_check_reloc_and_exec(kgem, 2) ||
-	    !kgem_check_batch(kgem, 8) ||
+	kgem_set_mode(kgem, KGEM_BLT, dst_bo);
+	if (!kgem_check_batch(kgem, 8) ||
+	    !kgem_check_reloc_and_exec(kgem, 2) ||
 	    !kgem_check_bo_fenced(kgem, dst_bo)) {
 		_kgem_submit(kgem);
 		_kgem_set_mode(kgem, KGEM_BLT);
diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c
index f420769..8db8b6f 100644
--- a/src/sna/sna_video_textured.c
+++ b/src/sna/sna_video_textured.c
@@ -275,11 +275,12 @@ sna_video_textured_put_image(ScrnInfoPtr scrn,
 		}
 	}
 
-	kgem_set_mode(&sna->kgem, KGEM_RENDER);
 	if (crtc && video->SyncToVblank != 0 &&
-	    sna_pixmap_is_scanout(sna, pixmap))
+	    sna_pixmap_is_scanout(sna, pixmap)) {
+		kgem_set_mode(&sna->kgem, KGEM_RENDER, sna_pixmap(pixmap)->gpu_bo);
 		flush = sna_wait_for_scanline(sna, pixmap, crtc,
 					      &clip->extents);
+	}
 
 	ret = Success;
 	if (!sna->render.video(sna, video, &frame, clip,


More information about the xorg-commit mailing list