xf86-video-intel: 3 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_io.c src/sna/sna_render.c src/sna/sna_render.h src/sna/sna_tiling.c

Chris Wilson ickle at kemper.freedesktop.org
Sat Feb 4 07:29:53 PST 2012


 src/sna/gen2_render.c |   40 ++++++++++++-
 src/sna/gen3_render.c |   44 +++++++++++++--
 src/sna/gen4_render.c |  130 ++++++++++++++++++++++++++++++++++----------
 src/sna/gen5_render.c |  136 ++++++++++++++++++++++++++++++----------------
 src/sna/gen6_render.c |  145 ++++++++++++++++++++++++++++++++++++--------------
 src/sna/gen7_render.c |  140 +++++++++++++++++++++++++++++++++++++-----------
 src/sna/kgem.c        |    6 +-
 src/sna/kgem.h        |   26 +++++---
 src/sna/sna_accel.c   |   16 ++++-
 src/sna/sna_blt.c     |   17 +++--
 src/sna/sna_io.c      |    2 
 src/sna/sna_render.c  |  118 +++++++++++++++++++++++++++++++++++-----
 src/sna/sna_render.h  |   27 ++++++++-
 src/sna/sna_tiling.c  |  140 ++++++++++++++++++++++++++++++++++++++++++++++--
 14 files changed, 785 insertions(+), 202 deletions(-)

New commits:
commit 4baa2806bc0f51e7576b769ca6750deb3821c4d3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Feb 4 12:06:22 2012 +0000

    sna: Check if the damage reduces to all before performing the migration
    
    An assert exposed a situation where we had accumulated an unreduced
    damage-all and so we were taking the slow path only to discover later
    that it was a damage-all and that we had performed needless checks.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 1d2b999..b6f6772 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -416,6 +416,8 @@ static inline uint32_t default_tiling(PixmapPtr pixmap)
 	if (sna_damage_is_all(&priv->cpu_damage,
 			      pixmap->drawable.width,
 			      pixmap->drawable.height)) {
+		DBG(("%s: entire source is damaged, using Y-tiling\n",
+		     __FUNCTION__));
 		sna_damage_destroy(&priv->gpu_damage);
 		priv->undamaged = false;
 		return I915_TILING_Y;
@@ -1221,7 +1223,9 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		return true;
 	}
 
-	if (DAMAGE_IS_ALL(priv->cpu_damage))
+	if (sna_damage_is_all(&priv->cpu_damage,
+			      pixmap->drawable.width,
+			      pixmap->drawable.height))
 		goto out;
 
 	if (priv->clear)
@@ -1245,6 +1249,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	}
 
 	if ((flags & MOVE_READ) == 0) {
+		DBG(("%s: no read, checking to see if we can stream the write into the GPU bo\n",
+		     __FUNCTION__));
 		assert(flags & MOVE_WRITE);
 
 		if (priv->stride && priv->gpu_bo &&
@@ -1611,7 +1617,9 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, BoxPtr box, unsigned int flags)
 
 	assert_pixmap_contains_box(pixmap, box);
 
-	if (DAMAGE_IS_ALL(priv->gpu_damage))
+	if (sna_damage_is_all(&priv->gpu_damage,
+			      pixmap->drawable.width,
+			      pixmap->drawable.height))
 		goto done;
 
 	if (priv->gpu_bo == NULL) {
@@ -2081,7 +2089,9 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 		return NULL;
 	}
 
-	if (DAMAGE_IS_ALL(priv->gpu_damage)) {
+	if (sna_damage_is_all(&priv->gpu_damage,
+			      pixmap->drawable.width,
+			      pixmap->drawable.height)) {
 		DBG(("%s: already all-damaged\n", __FUNCTION__));
 		goto active;
 	}
commit 2653524dffc1fe0dbff7d74bfc9be535d9ececb1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Feb 3 20:06:43 2012 +0000

    sna: Reduce the downsample tile size to accommodate alignment
    
    If we need to enlarge the sampled tile due to tiling alignments, the
    resulting sample can become larger than we can accommodate through the 3D
    pipeline, resulting in FAIL.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index bc8b2de..c2b9e79 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -622,7 +622,7 @@ static int sna_render_picture_downsample(struct sna *sna,
 	struct sna_pixmap *priv;
 	pixman_transform_t t;
 	PixmapPtr tmp;
-	int width, height;
+	int width, height, size;
 	int sx, sy, ox, oy, ow, oh;
 	int error, ret = 0;
 	BoxRec box, b;
@@ -743,8 +743,13 @@ static int sna_render_picture_downsample(struct sna *sna,
 	ValidatePicture(tmp_dst);
 	ValidatePicture(tmp_src);
 
-	w = sna->render.max_3d_size / sx - 2 * sx;
-	h = sna->render.max_3d_size / sy - 2 * sy;
+	/* Use a small size to accommodate enlargement through tile alignment */
+	size = sna->render.max_3d_size - 4096 / pixmap->drawable.bitsPerPixel;
+	while (size * size * 4 > sna->kgem.max_copy_tile_size)
+		size /= 2;
+
+	w = size / sx - 2 * sx;
+	h = size / sy - 2 * sy;
 	DBG(("%s %d:%d downsampling using %dx%d GPU tiles\n",
 	     __FUNCTION__, (width + w-1)/w, (height + h-1)/h, w, h));
 
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index 00e111c..c6e898b 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -142,7 +142,8 @@ sna_tiling_composite_done(struct sna *sna,
 
 	/* Use a small step to accommodate enlargement through tile alignment */
 	step = sna->render.max_3d_size;
-	if (tile->dst_x & (8*512 / tile->dst->pDrawable->bitsPerPixel - 1))
+	if (tile->dst_x & (8*512 / tile->dst->pDrawable->bitsPerPixel - 1) ||
+	    tile->dst_y & 63)
 		step /= 2;
 	while (step * step * 4 > sna->kgem.max_copy_tile_size)
 		step /= 2;
@@ -330,7 +331,11 @@ sna_tiling_fill_boxes(struct sna *sna,
 
 	pixman_region_init_rects(&region, box, n);
 
+	/* Use a small step to accommodate enlargement through tile alignment */
 	step = sna->render.max_3d_size;
+	if (region.extents.x1 & (8*512 / dst->drawable.bitsPerPixel - 1) ||
+	    region.extents.y1 & 63)
+		step /= 2;
 	while (step * step * 4 > sna->kgem.max_copy_tile_size)
 		step /= 2;
 
@@ -443,7 +448,10 @@ Bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
 
 	pixman_region_init_rects(&region, box, nbox);
 
+	/* Use a small step to accommodate enlargement through tile alignment */
 	step = sna->render.max_3d_size;
+	if (region.extents.x1 & (8*512 / bpp - 1) || region.extents.y1 & 63)
+		step /= 2;
 	while (step * step * 4 > sna->kgem.max_copy_tile_size)
 		step /= 2;
 
commit 93a0b10f163ee79b6a6a7ea46b0a33b622b1f86e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Feb 3 19:30:24 2012 +0000

    sna: Apply redirection for the render copy into large pixmaps
    
    If the pixmap is larger than the pipeline, but the operation extents fit
    within the pipeline, we may be able to create a proxy target to
    transform the operation into one that fits within the constraints of the
    render pipeline.
    
    This fixes the infinite recursion hit with partially displayed extremely
    large images.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 7250d66..97b558d 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -2852,9 +2852,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
 
 	if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */
 	    too_large(src->drawable.width, src->drawable.height) ||
-	    src_bo->pitch > MAX_3D_PITCH ||
-	    too_large(dst->drawable.width, dst->drawable.height) ||
-	    dst_bo->pitch < 8 || dst_bo->pitch > MAX_3D_PITCH) {
+	    src_bo->pitch > MAX_3D_PITCH || dst_bo->pitch < 8) {
 fallback:
 		return sna_blt_copy_boxes_fallback(sna, alu,
 						   src, src_bo, src_dx, src_dy,
@@ -2876,10 +2874,39 @@ fallback:
 	tmp.dst.height = dst->drawable.height;
 	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
 	tmp.dst.bo = dst_bo;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	sna_render_composite_redirect_init(&tmp);
+	if (too_large(tmp.dst.width, tmp.dst.height) ||
+	    dst_bo->pitch > MAX_3D_PITCH) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+		if (!sna_render_composite_redirect(sna, &tmp,
+						   extents.x1, extents.y1,
+						   extents.x2 - extents.x1,
+						   extents.y2 - extents.y1))
+			goto fallback_tiled;
+	}
 
 	tmp.floats_per_vertex = 4;
 	tmp.floats_per_rect = 12;
 
+	dst_dx += tmp.dst.x;
+	dst_dy += tmp.dst.y;
+	tmp.dst.x = tmp.dst.y = 0;
+
 	gen2_render_copy_setup_source(&tmp.src, src, src_bo);
 	gen2_emit_copy_state(sna, &tmp);
 	do {
@@ -2917,7 +2944,14 @@ fallback:
 	} while (n);
 
 	gen2_vertex_flush(sna, &tmp);
+	sna_render_composite_redirect_done(sna, &tmp);
 	return TRUE;
+
+fallback_tiled:
+	return sna_tiling_copy_boxes(sna, alu,
+				     src, src_bo, src_dx, src_dy,
+				     dst, dst_bo, dst_dx, dst_dy,
+				     box, n);
 }
 
 static void
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 784d399..d5f5617 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3841,10 +3841,8 @@ gen3_render_copy_boxes(struct sna *sna, uint8_t alu,
 	if (!(alu == GXcopy || alu == GXclear) ||
 	    src_bo == dst_bo || /* XXX handle overlap using 3D ? */
 	    src_bo->pitch > MAX_3D_PITCH ||
-	    too_large(src->drawable.width, src->drawable.height) ||
-	    dst_bo->pitch > MAX_3D_PITCH ||
-	    too_large(dst->drawable.width, dst->drawable.height)) {
-fallback:
+	    too_large(src->drawable.width, src->drawable.height)) {
+fallback_blt:
 		return sna_blt_copy_boxes_fallback(sna, alu,
 						   src, src_bo, src_dx, src_dy,
 						   dst, dst_bo, dst_dx, dst_dy,
@@ -3854,7 +3852,7 @@ fallback:
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
-			goto fallback;
+			goto fallback_blt;
 	}
 
 	memset(&tmp, 0, sizeof(tmp));
@@ -3865,6 +3863,31 @@ fallback:
 	tmp.dst.height = dst->drawable.height;
 	tmp.dst.format = sna_format_for_depth(dst->drawable.depth);
 	tmp.dst.bo = dst_bo;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	sna_render_composite_redirect_init(&tmp);
+	if (too_large(tmp.dst.width, tmp.dst.height) ||
+	    dst_bo->pitch > MAX_3D_PITCH) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+		if (!sna_render_composite_redirect(sna, &tmp,
+						   extents.x1, extents.y1,
+						   extents.x2 - extents.x1,
+						   extents.y2 - extents.y1))
+			goto fallback_tiled;
+	}
 
 	gen3_render_copy_setup_source(&tmp.src, src, src_bo);
 
@@ -3873,6 +3896,10 @@ fallback:
 	tmp.mask.bo = NULL;
 	tmp.mask.u.gen3.type = SHADER_NONE;
 
+	dst_dx += tmp.dst.x;
+	dst_dy += tmp.dst.y;
+	tmp.dst.x = tmp.dst.y = 0;
+
 	gen3_emit_composite_state(sna, &tmp);
 	gen3_align_vertex(sna, &tmp);
 
@@ -3911,7 +3938,14 @@ fallback:
 	} while (n);
 
 	gen3_vertex_flush(sna);
+	sna_render_composite_redirect_done(sna, &tmp);
 	return TRUE;
+
+fallback_tiled:
+	return sna_tiling_copy_boxes(sna, alu,
+				     src, src_bo, src_dx, src_dy,
+				     dst, dst_bo, dst_dx, dst_dy,
+				     box, n);
 }
 
 static void
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index ffdcbb7..b3a64d9 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -289,6 +289,13 @@ gen4_emit_pipelined_pointers(struct sna *sna,
 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
+#define GEN4_MAX_3D_SIZE 8192
+
+static inline bool too_large(int width, int height)
+{
+	return width > GEN4_MAX_3D_SIZE || height > GEN4_MAX_3D_SIZE;
+}
+
 static int
 gen4_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine)
 {
@@ -1884,7 +1891,7 @@ gen4_composite_picture(struct sna *sna,
 		return sna_render_picture_convert(sna, picture, channel, pixmap,
 						  x, y, w, h, dst_x, dst_y);
 
-	if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192)
+	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
 		return sna_render_picture_extract(sna, picture, channel,
 						  x, y, w, h, dst_x, dst_y);
 
@@ -1983,7 +1990,7 @@ try_blt(struct sna *sna,
 		return TRUE;
 	}
 
-	if (width > 8192 || height > 8192) {
+	if (too_large(width, height)) {
 		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
 		     __FUNCTION__, width, height));
 		return TRUE;
@@ -2221,11 +2228,10 @@ gen4_render_composite(struct sna *sna,
 		return FALSE;
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
-	if (tmp->dst.width > 8192 || tmp->dst.height > 8192) {
-		if (!sna_render_composite_redirect(sna, tmp,
-						   dst_x, dst_y, width, height))
+	if (too_large(tmp->dst.width, tmp->dst.height) &&
+	    !sna_render_composite_redirect(sna, tmp,
+					   dst_x, dst_y, width, height))
 			return FALSE;
-	}
 
 	switch (gen4_composite_picture(sna, src, &tmp->src,
 				       src_x, src_y,
@@ -2432,10 +2438,8 @@ gen4_render_copy_boxes(struct sna *sna, uint8_t alu,
 			       box, n))
 		return TRUE;
 
-	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
-	    src->drawable.width > 8192 || src->drawable.height > 8192 ||
-	    dst->drawable.width > 8192 || dst->drawable.height > 8192) {
-fallback:
+	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
+fallback_blt:
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
 
@@ -2458,24 +2462,73 @@ fallback:
 		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
 	}
 	if (!gen4_check_format(tmp.src.pict_format))
-		goto fallback;
+		goto fallback_blt;
 
 	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
 	tmp.dst.pixmap = dst;
 	tmp.dst.width  = dst->drawable.width;
 	tmp.dst.height = dst->drawable.height;
+	tmp.dst.x = tmp.dst.y = 0;
 	tmp.dst.bo = dst_bo;
-	tmp.dst.x = dst_dx;
-	tmp.dst.y = dst_dy;
 
-	tmp.src.bo = src_bo;
+	sna_render_composite_redirect_init(&tmp);
+	if (too_large(tmp.dst.width, tmp.dst.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+		if (!sna_render_composite_redirect(sna, &tmp,
+						   extents.x1, extents.y1,
+						   extents.x2 - extents.x1,
+						   extents.y2 - extents.y1))
+			goto fallback_tiled;
+	}
+
 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
 	tmp.src.repeat = SAMPLER_EXTEND_NONE;
-	tmp.src.card_format =
-		gen4_get_card_format(tmp.src.pict_format),
-	tmp.src.width  = src->drawable.width;
-	tmp.src.height = src->drawable.height;
+	tmp.src.card_format = gen4_get_card_format(tmp.src.pict_format);
+	if (too_large(src->drawable.width, src->drawable.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+
+		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
+					       extents.x1 + src_dx,
+					       extents.y1 + src_dy,
+					       extents.x2 - extents.x1,
+					       extents.y2 - extents.y1)) {
+			goto fallback_tiled_dst;
+		}
+	} else {
+		tmp.src.bo = kgem_bo_reference(src_bo);
+		tmp.src.width  = src->drawable.width;
+		tmp.src.height = src->drawable.height;
+		tmp.src.offset[0] = tmp.src.offset[1] = 0;
+		tmp.src.scale[0] = 1.f/src->drawable.width;
+		tmp.src.scale[1] = 1.f/src->drawable.height;
+	}
 
 	tmp.mask.bo = NULL;
 
@@ -2487,9 +2540,16 @@ fallback:
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
-			goto fallback;
+			goto fallback_tiled_src;
 	}
 
+	dst_dx += tmp.dst.x;
+	dst_dy += tmp.dst.y;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	src_dx += tmp.src.offset[0];
+	src_dy += tmp.src.offset[1];
+
 	gen4_copy_bind_surfaces(sna, &tmp);
 	gen4_align_vertex(sna, &tmp);
 
@@ -2499,10 +2559,23 @@ fallback:
 		gen4_render_copy_one(sna, &tmp,
 				     box->x1 + src_dx, box->y1 + src_dy,
 				     box->x2 - box->x1, box->y2 - box->y1,
-				     box->x1, box->y1);
+				     box->x1 + dst_dx, box->y1 + dst_dy);
 		box++;
 	} while (--n);
+	sna_render_composite_redirect_done(sna, &tmp);
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 	return TRUE;
+
+fallback_tiled_src:
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+fallback_tiled_dst:
+	if (tmp.redirect.real_bo)
+		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
+fallback_tiled:
+	return sna_tiling_copy_boxes(sna, alu,
+				     src, src_bo, src_dx, src_dy,
+				     dst, dst_bo, dst_dx, dst_dy,
+				     box, n);
 }
 
 static void
@@ -2552,8 +2625,8 @@ gen4_render_copy(struct sna *sna, uint8_t alu,
 		return TRUE;
 
 	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
-	    src->drawable.width > 8192 || src->drawable.height > 8192 ||
-	    dst->drawable.width > 8192 || dst->drawable.height > 8192) {
+	    too_large(src->drawable.width, src->drawable.height) ||
+	    too_large(dst->drawable.width, dst->drawable.height)) {
 fallback:
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
@@ -2683,10 +2756,7 @@ gen4_render_fill_boxes(struct sna *sna,
 		return FALSE;
 	}
 
-	if (prefer_blt(sna) ||
-	    dst->drawable.width > 8192 ||
-	    dst->drawable.height > 8192 ||
-	    !gen4_check_dst_format(format)) {
+	if (prefer_blt(sna) || too_large(dst->drawable.width, dst->drawable.height)) {
 		uint8_t alu = -1;
 
 		if (op == PictOpClear || (op == PictOpOutReverse && color->alpha >= 0xff00))
@@ -2715,7 +2785,7 @@ gen4_render_fill_boxes(struct sna *sna,
 		if (!gen4_check_dst_format(format))
 			return FALSE;
 
-		if (dst->drawable.width > 8192 || dst->drawable.height > 8192)
+		if (too_large(dst->drawable.width, dst->drawable.height))
 			return sna_tiling_fill_boxes(sna, op, format, color,
 						     dst, dst_bo, box, n);
 	}
@@ -2834,7 +2904,7 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 		return TRUE;
 
 	if (!(alu == GXcopy || alu == GXclear) ||
-	    dst->drawable.width > 8192 || dst->drawable.height > 8192)
+	    too_large(dst->drawable.width, dst->drawable.height))
 		return sna_blt_fill(sna, alu,
 				    dst_bo, dst->drawable.bitsPerPixel,
 				    color,
@@ -2925,7 +2995,7 @@ gen4_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
 	/* Must use the BLT if we can't RENDER... */
 	if (!(alu == GXcopy || alu == GXclear) ||
-	    dst->drawable.width > 8192 || dst->drawable.height > 8192)
+	    too_large(dst->drawable.width, dst->drawable.height))
 		return FALSE;
 
 	if (alu == GXclear)
@@ -3251,7 +3321,7 @@ Bool gen4_render_init(struct sna *sna)
 	sna->render.reset = gen4_render_reset;
 	sna->render.fini = gen4_render_fini;
 
-	sna->render.max_3d_size = 8192;
+	sna->render.max_3d_size = GEN4_MAX_3D_SIZE;
 	sna->render.max_3d_pitch = 1 << 18;
 	return TRUE;
 }
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 03dc8c9..933c51f 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -1378,6 +1378,9 @@ gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op)
 	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
 	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
+	assert(!too_large(op->dst.x, op->dst.y));
+	assert(!too_large(op->dst.width, op->dst.height));
+
 	if (!DBG_NO_STATE_CACHE &&
 	    sna->render_state.gen5.drawrect_limit == limit &&
 	    sna->render_state.gen5.drawrect_offset == offset)
@@ -2731,20 +2734,6 @@ gen5_copy_bind_surfaces(struct sna *sna,
 	gen5_emit_state(sna, op, offset);
 }
 
-static inline bool untiled_tlb_miss(struct kgem_bo *bo)
-{
-	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
-}
-
-static inline bool prefer_blt_copy(struct sna *sna,
-				   struct kgem_bo *src_bo,
-				   struct kgem_bo *dst_bo)
-{
-	return (sna->kgem.ring != KGEM_RENDER ||
-		untiled_tlb_miss(src_bo) ||
-		untiled_tlb_miss(dst_bo));
-}
-
 static Bool
 gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
 		       PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
@@ -2753,8 +2742,7 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
 {
 	struct sna_composite_op tmp;
 
-	if (prefer_blt_copy(sna, src_bo, dst_bo) &&
-	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy_boxes(sna, alu,
 			       src_bo, src_dx, src_dy,
 			       dst_bo, dst_dx, dst_dy,
@@ -2762,12 +2750,10 @@ gen5_render_copy_boxes(struct sna *sna, uint8_t alu,
 			       box, n))
 		return TRUE;
 
-	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo ||
-	    too_large(src->drawable.width, src->drawable.height) ||
-	    too_large(dst->drawable.width, dst->drawable.height)) {
-fallback:
-	    if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
-		    return FALSE;
+	if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo) {
+fallback_blt:
+		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
+			return FALSE;
 
 		return sna_blt_copy_boxes_fallback(sna, alu,
 						   src, src_bo, src_dx, src_dy,
@@ -2787,7 +2773,7 @@ fallback:
 	if (!gen5_check_format(tmp.src.pict_format)) {
 		DBG(("%s: unsupported source format, %x, use BLT\n",
 		     __FUNCTION__, tmp.src.pict_format));
-		goto fallback;
+		goto fallback_blt;
 	}
 
 	DBG(("%s (%d, %d)->(%d, %d) x %d\n",
@@ -2798,17 +2784,66 @@ fallback:
 	tmp.dst.pixmap = dst;
 	tmp.dst.width  = dst->drawable.width;
 	tmp.dst.height = dst->drawable.height;
+	tmp.dst.x = tmp.dst.y = 0;
 	tmp.dst.bo = dst_bo;
-	tmp.dst.x = dst_dx;
-	tmp.dst.y = dst_dy;
 
-	tmp.src.bo = src_bo;
+	sna_render_composite_redirect_init(&tmp);
+	if (too_large(tmp.dst.width, tmp.dst.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+
+		if (!sna_render_composite_redirect(sna, &tmp,
+						   extents.x1, extents.y1,
+						   extents.x2 - extents.x1,
+						   extents.y2 - extents.y1))
+			goto fallback_tiled;
+	}
+
 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
 	tmp.src.repeat = SAMPLER_EXTEND_NONE;
-	tmp.src.card_format =
-		gen5_get_card_format(tmp.src.pict_format);
-	tmp.src.width  = src->drawable.width;
-	tmp.src.height = src->drawable.height;
+	tmp.src.card_format = gen5_get_card_format(tmp.src.pict_format);
+	if (too_large(src->drawable.width, src->drawable.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+
+		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
+					       extents.x1 + src_dx,
+					       extents.y1 + src_dy,
+					       extents.x2 - extents.x1,
+					       extents.y2 - extents.y1))
+			goto fallback_tiled_dst;
+	} else {
+		tmp.src.bo = kgem_bo_reference(src_bo);
+		tmp.src.width  = src->drawable.width;
+		tmp.src.height = src->drawable.height;
+		tmp.src.offset[0] = tmp.src.offset[1] = 0;
+		tmp.src.scale[0] = 1.f/src->drawable.width;
+		tmp.src.scale[1] = 1.f/src->drawable.height;
+	}
 
 	tmp.is_affine = TRUE;
 	tmp.floats_per_vertex = 3;
@@ -2819,24 +2854,19 @@ fallback:
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
-			goto fallback;
+			goto fallback_tiled_src;
 	}
 
-	if (kgem_bo_is_dirty(src_bo)) {
-		if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
-		    sna_blt_copy_boxes(sna, alu,
-				       src_bo, src_dx, src_dy,
-				       dst_bo, dst_dx, dst_dy,
-				       dst->drawable.bitsPerPixel,
-				       box, n))
-			return TRUE;
-	}
+	dst_dx += tmp.dst.x;
+	dst_dy += tmp.dst.y;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	src_dx += tmp.src.offset[0];
+	src_dy += tmp.src.offset[1];
 
 	gen5_copy_bind_surfaces(sna, &tmp);
 	gen5_align_vertex(sna, &tmp);
 
-	tmp.src.scale[0] = 1.f/src->drawable.width;
-	tmp.src.scale[1] = 1.f/src->drawable.height;
 	do {
 		int n_this_time = gen5_get_rectangles(sna, &tmp, n);
 		if (n_this_time == 0) {
@@ -2850,15 +2880,15 @@ fallback:
 			     box->x1 + src_dx, box->y1 + src_dy,
 			     box->x1 + dst_dx, box->y1 + dst_dy,
 			     box->x2 - box->x1, box->y2 - box->y1));
-			OUT_VERTEX(box->x2, box->y2);
+			OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy);
 			OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]);
 			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
-			OUT_VERTEX(box->x1, box->y2);
+			OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy);
 			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
 			OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]);
 
-			OUT_VERTEX(box->x1, box->y1);
+			OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy);
 			OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]);
 			OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]);
 
@@ -2867,7 +2897,20 @@ fallback:
 	} while (n);
 
 	gen5_vertex_flush(sna);
+	sna_render_composite_redirect_done(sna, &tmp);
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 	return TRUE;
+
+fallback_tiled_src:
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+fallback_tiled_dst:
+	if (tmp.redirect.real_bo)
+		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
+fallback_tiled:
+	return sna_tiling_copy_boxes(sna, alu,
+				     src, src_bo, src_dx, src_dy,
+				     dst, dst_bo, dst_dx, dst_dy,
+				     box, n);
 }
 
 static void
@@ -2916,8 +2959,7 @@ gen5_render_copy(struct sna *sna, uint8_t alu,
 {
 	DBG(("%s (alu=%d)\n", __FUNCTION__, alu));
 
-	if (prefer_blt_copy(sna, src_bo, dst_bo) &&
-	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
+	if (sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy(sna, alu,
 			 src_bo, dst_bo,
 			 dst->drawable.bitsPerPixel,
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 9f799ef..08f9668 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -229,6 +229,11 @@ static const struct formatinfo {
 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
+static inline bool too_large(int width, int height)
+{
+	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
+}
+
 static uint32_t gen6_get_blend(int op,
 			       bool has_component_alpha,
 			       uint32_t dst_format)
@@ -708,6 +713,9 @@ gen6_emit_drawing_rectangle(struct sna *sna,
 	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
 	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
+	assert(!too_large(op->dst.x, op->dst.y));
+	assert(!too_large(op->dst.width, op->dst.height));
+
 	if  (sna->render_state.gen6.drawrect_limit  == limit &&
 	     sna->render_state.gen6.drawrect_offset == offset)
 		return false;
@@ -2061,11 +2069,6 @@ gen6_composite_solid_init(struct sna *sna,
 	return channel->bo != NULL;
 }
 
-static inline bool too_large(int width, int height)
-{
-	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
-}
-
 static int
 gen6_composite_picture(struct sna *sna,
 		       PicturePtr picture,
@@ -3082,13 +3085,22 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
 }
 
+static bool prefer_blt_bo(struct sna *sna,
+			  PixmapPtr pixmap,
+			  struct kgem_bo *bo)
+{
+	return (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
+		untiled_tlb_miss(bo)) &&
+		kgem_bo_can_blt(&sna->kgem, bo);
+}
+
 static inline bool prefer_blt_copy(struct sna *sna,
-				   struct kgem_bo *src_bo,
-				   struct kgem_bo *dst_bo)
+				   PixmapPtr src, struct kgem_bo *src_bo,
+				   PixmapPtr dst, struct kgem_bo *dst_bo)
 {
-	return (prefer_blt_ring(sna) ||
-		untiled_tlb_miss(src_bo) ||
-		untiled_tlb_miss(dst_bo));
+	return (sna->kgem.ring != KGEM_RENDER ||
+		prefer_blt_bo(sna, src, src_bo) ||
+		prefer_blt_bo(sna, dst, dst_bo));
 }
 
 static inline bool
@@ -3148,7 +3160,7 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
 		      dst_bo, dst_dx, dst_dy,
 		      box, n)));
 
-	if (prefer_blt_copy(sna, src_bo, dst_bo) &&
+	if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) &&
 	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy_boxes(sna, alu,
 			       src_bo, src_dx, src_dy,
@@ -3160,26 +3172,15 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu,
 	if (!(alu == GXcopy || alu == GXclear) ||
 	    overlaps(src_bo, src_dx, src_dy,
 		     dst_bo, dst_dx, dst_dy,
-		     box, n) ||
-	    too_large(src->drawable.width, src->drawable.height) ||
-	    too_large(dst->drawable.width, dst->drawable.height)) {
-fallback:
+		     box, n)) {
+fallback_blt:
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return false;
 
-		if (sna_blt_copy_boxes_fallback(sna, alu,
+		return sna_blt_copy_boxes_fallback(sna, alu,
 						 src, src_bo, src_dx, src_dy,
 						 dst, dst_bo, dst_dx, dst_dy,
-						 box, n))
-			return true;
-
-		return false;
-#if 0
-		return sna_tiling_copy_boxes(sna,
-					     src, src_bo, src_dx, src_dy,
-					     dst, dst_bo, dst_dx, dst_dy,
-					     box, n);
-#endif
+						 box, n);
 	}
 
 	if (dst->drawable.depth == src->drawable.depth) {
@@ -3190,25 +3191,73 @@ fallback:
 		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
 	}
 	if (!gen6_check_format(tmp.src.pict_format))
-		goto fallback;
+		goto fallback_blt;
 
 	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
 	tmp.dst.pixmap = dst;
-	tmp.dst.x = tmp.dst.y = 0;
 	tmp.dst.width  = dst->drawable.width;
 	tmp.dst.height = dst->drawable.height;
 	tmp.dst.bo = dst_bo;
-	tmp.dst.x = dst_dx;
-	tmp.dst.y = dst_dy;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	sna_render_composite_redirect_init(&tmp);
+	if (too_large(tmp.dst.width, tmp.dst.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+		if (!sna_render_composite_redirect(sna, &tmp,
+						   extents.x1, extents.y1,
+						   extents.x2 - extents.x1,
+						   extents.y2 - extents.y1))
+			goto fallback_tiled;
+	}
 
-	tmp.src.bo = src_bo;
 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
 	tmp.src.repeat = SAMPLER_EXTEND_NONE;
 	tmp.src.card_format =
 		gen6_get_card_format(tmp.src.pict_format);
-	tmp.src.width  = src->drawable.width;
-	tmp.src.height = src->drawable.height;
+	if (too_large(src->drawable.width, src->drawable.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+
+		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
+					       extents.x1 + src_dx,
+					       extents.y1 + src_dy,
+					       extents.x2 - extents.x1,
+					       extents.y2 - extents.y1))
+			goto fallback_tiled_dst;
+	} else {
+		tmp.src.bo = kgem_bo_reference(src_bo);
+		tmp.src.width  = src->drawable.width;
+		tmp.src.height = src->drawable.height;
+		tmp.src.offset[0] = tmp.src.offset[1] = 0;
+		tmp.src.scale[0] = 1.f/src->drawable.width;
+		tmp.src.scale[1] = 1.f/src->drawable.height;
+	}
 
 	tmp.mask.bo = NULL;
 	tmp.mask.filter = SAMPLER_FILTER_NEAREST;
@@ -3229,10 +3278,17 @@ fallback:
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
-			goto fallback;
+			goto fallback_tiled_src;
 		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
 	}
 
+	dst_dx += tmp.dst.x;
+	dst_dy += tmp.dst.y;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	src_dx += tmp.src.offset[0];
+	src_dy += tmp.src.offset[1];
+
 	gen6_emit_copy_state(sna, &tmp);
 	gen6_align_vertex(sna, &tmp);
 
@@ -3256,9 +3312,9 @@ fallback:
 			     box->x1 + src_dx, box->y1 + src_dy,
 			     box->x1 + dst_dx, box->y1 + dst_dy,
 			     box->x2 - box->x1, box->y2 - box->y1));
-			v[0] = pack_2s(box->x2, box->y2);
-			v[3] = pack_2s(box->x1, box->y2);
-			v[6] = pack_2s(box->x1, box->y1);
+			v[0] = pack_2s(box->x2 + dst_dx, box->y2 + dst_dy);
+			v[3] = pack_2s(box->x1 + dst_dx, box->y2 + dst_dy);
+			v[6] = pack_2s(box->x1 + dst_dx, box->y1 + dst_dy);
 
 			v[1] = (box->x2 + src_dx) * tmp.src.scale[0];
 			v[7] = v[4] = (box->x1 + src_dx) * tmp.src.scale[0];
@@ -3272,7 +3328,20 @@ fallback:
 	} while (n);
 
 	gen6_vertex_flush(sna);
+	sna_render_composite_redirect_done(sna, &tmp);
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 	return TRUE;
+
+fallback_tiled_src:
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+fallback_tiled_dst:
+	if (tmp.redirect.real_bo)
+		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
+fallback_tiled:
+	return sna_tiling_copy_boxes(sna, alu,
+				     src, src_bo, src_dx, src_dy,
+				     dst, dst_bo, dst_dx, dst_dy,
+				     box, n);
 }
 
 static void
@@ -3329,7 +3398,7 @@ gen6_render_copy(struct sna *sna, uint8_t alu,
 	     src->drawable.width, src->drawable.height,
 	     dst->drawable.width, dst->drawable.height));
 
-	if (prefer_blt_copy(sna, src_bo, dst_bo) &&
+	if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) &&
 	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy(sna, alu,
 			 src_bo, dst_bo,
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index e2486c6..5385a47 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -235,6 +235,11 @@ static const struct formatinfo {
 #define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y)
 #define OUT_VERTEX_F(v) vertex_emit(sna, v)
 
+static inline bool too_large(int width, int height)
+{
+	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
+}
+
 static uint32_t gen7_get_blend(int op,
 			       Bool has_component_alpha,
 			       uint32_t dst_format)
@@ -817,6 +822,9 @@ gen7_emit_drawing_rectangle(struct sna *sna,
 	uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1);
 	uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x;
 
+	assert(!too_large(op->dst.x, op->dst.y));
+	assert(!too_large(op->dst.width, op->dst.height));
+
 	if (sna->render_state.gen7.drawrect_limit == limit &&
 	    sna->render_state.gen7.drawrect_offset == offset)
 		return;
@@ -2124,11 +2132,6 @@ gen7_composite_solid_init(struct sna *sna,
 	return channel->bo != NULL;
 }
 
-static inline bool too_large(int width, int height)
-{
-	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
-}
-
 static int
 gen7_composite_picture(struct sna *sna,
 		       PicturePtr picture,
@@ -3130,13 +3133,22 @@ static inline bool untiled_tlb_miss(struct kgem_bo *bo)
 	return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096;
 }
 
+static bool prefer_blt_bo(struct sna *sna,
+			  PixmapPtr pixmap,
+			  struct kgem_bo *bo)
+{
+	return (too_large(pixmap->drawable.width, pixmap->drawable.height) ||
+		untiled_tlb_miss(bo)) &&
+		kgem_bo_can_blt(&sna->kgem, bo);
+}
+
 static inline bool prefer_blt_copy(struct sna *sna,
-				   struct kgem_bo *src_bo,
-				   struct kgem_bo *dst_bo)
+				   PixmapPtr src, struct kgem_bo *src_bo,
+				   PixmapPtr dst, struct kgem_bo *dst_bo)
 {
 	return (sna->kgem.ring != KGEM_RENDER ||
-		untiled_tlb_miss(src_bo) ||
-		untiled_tlb_miss(dst_bo));
+		prefer_blt_bo(sna, src, src_bo) ||
+		prefer_blt_bo(sna, dst, dst_bo));
 }
 
 static inline bool
@@ -3196,7 +3208,7 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
 		      dst_bo, dst_dx, dst_dy,
 		      box, n)));
 
-	if (prefer_blt_copy(sna, src_bo, dst_bo) &&
+	if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) &&
 	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy_boxes(sna, alu,
 			       src_bo, src_dx, src_dy,
@@ -3208,17 +3220,15 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu,
 	if (!(alu == GXcopy || alu == GXclear) ||
 	    overlaps(src_bo, src_dx, src_dy,
 		     dst_bo, dst_dx, dst_dy,
-		     box, n) ||
-	    too_large(src->drawable.width, src->drawable.height) ||
-	    too_large(dst->drawable.width, dst->drawable.height)) {
-fallback:
+		     box, n)) {
+fallback_blt:
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
-			return FALSE;
+			return false;
 
 		return sna_blt_copy_boxes_fallback(sna, alu,
-						   src, src_bo, src_dx, src_dy,
-						   dst, dst_bo, dst_dx, dst_dy,
-						   box, n);
+						 src, src_bo, src_dx, src_dy,
+						 dst, dst_bo, dst_dx, dst_dy,
+						 box, n);
 	}
 
 	if (dst->drawable.depth == src->drawable.depth) {
@@ -3229,25 +3239,73 @@ fallback:
 		tmp.src.pict_format = sna_format_for_depth(src->drawable.depth);
 	}
 	if (!gen7_check_format(tmp.src.pict_format))
-		goto fallback;
+		goto fallback_blt;
 
 	tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear;
 
 	tmp.dst.pixmap = dst;
-	tmp.dst.x = tmp.dst.y = 0;
 	tmp.dst.width  = dst->drawable.width;
 	tmp.dst.height = dst->drawable.height;
 	tmp.dst.bo = dst_bo;
-	tmp.dst.x = dst_dx;
-	tmp.dst.y = dst_dy;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	sna_render_composite_redirect_init(&tmp);
+	if (too_large(tmp.dst.width, tmp.dst.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+		if (!sna_render_composite_redirect(sna, &tmp,
+						   extents.x1, extents.y1,
+						   extents.x2 - extents.x1,
+						   extents.y2 - extents.y1))
+			goto fallback_tiled;
+	}
 
-	tmp.src.bo = src_bo;
 	tmp.src.filter = SAMPLER_FILTER_NEAREST;
 	tmp.src.repeat = SAMPLER_EXTEND_NONE;
 	tmp.src.card_format =
 		gen7_get_card_format(tmp.src.pict_format);
-	tmp.src.width  = src->drawable.width;
-	tmp.src.height = src->drawable.height;
+	if (too_large(src->drawable.width, src->drawable.height)) {
+		BoxRec extents = box[0];
+		int i;
+
+		for (i = 1; i < n; i++) {
+			if (extents.x1 < box[i].x1)
+				extents.x1 = box[i].x1;
+			if (extents.y1 < box[i].y1)
+				extents.y1 = box[i].y1;
+
+			if (extents.x2 > box[i].x2)
+				extents.x2 = box[i].x2;
+			if (extents.y2 > box[i].y2)
+				extents.y2 = box[i].y2;
+		}
+
+		if (!sna_render_pixmap_partial(sna, src, src_bo, &tmp.src,
+					       extents.x1 + src_dx,
+					       extents.y1 + src_dy,
+					       extents.x2 - extents.x1,
+					       extents.y2 - extents.y1))
+			goto fallback_tiled_dst;
+	} else {
+		tmp.src.bo = kgem_bo_reference(src_bo);
+		tmp.src.width  = src->drawable.width;
+		tmp.src.height = src->drawable.height;
+		tmp.src.offset[0] = tmp.src.offset[1] = 0;
+		tmp.src.scale[0] = 1.f/src->drawable.width;
+		tmp.src.scale[1] = 1.f/src->drawable.height;
+	}
 
 	tmp.mask.bo = NULL;
 	tmp.mask.filter = SAMPLER_FILTER_NEAREST;
@@ -3259,7 +3317,7 @@ fallback:
 	tmp.has_component_alpha = 0;
 	tmp.need_magic_ca_pass = 0;
 
-	tmp.u.gen7.wm_kernel = GEN7_WM_KERNEL_NOMASK;
+	tmp.u.gen7.wm_kernel = GEN6_WM_KERNEL_NOMASK;
 	tmp.u.gen7.nr_surfaces = 2;
 	tmp.u.gen7.nr_inputs = 1;
 	tmp.u.gen7.ve_id = 1;
@@ -3268,10 +3326,17 @@ fallback:
 	if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL)) {
 		kgem_submit(&sna->kgem);
 		if (!kgem_check_bo(&sna->kgem, dst_bo, src_bo, NULL))
-			goto fallback;
+			goto fallback_tiled_src;
 		_kgem_set_mode(&sna->kgem, KGEM_RENDER);
 	}
 
+	dst_dx += tmp.dst.x;
+	dst_dy += tmp.dst.y;
+	tmp.dst.x = tmp.dst.y = 0;
+
+	src_dx += tmp.src.offset[0];
+	src_dy += tmp.src.offset[1];
+
 	gen7_emit_copy_state(sna, &tmp);
 	gen7_align_vertex(sna, &tmp);
 
@@ -3295,9 +3360,9 @@ fallback:
 			     box->x1 + src_dx, box->y1 + src_dy,
 			     box->x1 + dst_dx, box->y1 + dst_dy,
 			     box->x2 - box->x1, box->y2 - box->y1));
-			v[0] = pack_2s(box->x2, box->y2);
-			v[3] = pack_2s(box->x1, box->y2);
-			v[6] = pack_2s(box->x1, box->y1);
+			v[0] = pack_2s(box->x2 + dst_dx, box->y2 + dst_dy);
+			v[3] = pack_2s(box->x1 + dst_dx, box->y2 + dst_dy);
+			v[6] = pack_2s(box->x1 + dst_dx, box->y1 + dst_dy);
 
 			v[1] = (box->x2 + src_dx) * tmp.src.scale[0];
 			v[7] = v[4] = (box->x1 + src_dx) * tmp.src.scale[0];
@@ -3311,7 +3376,20 @@ fallback:
 	} while (n);
 
 	gen7_vertex_flush(sna);
+	sna_render_composite_redirect_done(sna, &tmp);
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
 	return TRUE;
+
+fallback_tiled_src:
+	kgem_bo_destroy(&sna->kgem, tmp.src.bo);
+fallback_tiled_dst:
+	if (tmp.redirect.real_bo)
+		kgem_bo_destroy(&sna->kgem, tmp.dst.bo);
+fallback_tiled:
+	return sna_tiling_copy_boxes(sna, alu,
+				     src, src_bo, src_dx, src_dy,
+				     dst, dst_bo, dst_dx, dst_dy,
+				     box, n);
 }
 
 static void
@@ -3368,7 +3446,7 @@ gen7_render_copy(struct sna *sna, uint8_t alu,
 	     src->drawable.width, src->drawable.height,
 	     dst->drawable.width, dst->drawable.height));
 
-	if (prefer_blt_copy(sna, src_bo, dst_bo) &&
+	if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo) &&
 	    sna_blt_compare_depth(&src->drawable, &dst->drawable) &&
 	    sna_blt_copy(sna, alu,
 			 src_bo, dst_bo,
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index d2580e6..757bad1 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -678,9 +678,9 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		 * disable dual-stream mode */
 		kgem->min_alignment = 64;
 
-	kgem->max_object_size = kgem->aperture_total / 2;
-	kgem->max_cpu_size = kgem->aperture_total / 2;
-	kgem->max_gpu_size = kgem->aperture_total / 2;
+	kgem->max_object_size = 2 * kgem->aperture_total / 3;
+	kgem->max_cpu_size = kgem->max_object_size;
+	kgem->max_gpu_size = kgem->max_object_size;
 	if (!kgem->has_llc)
 		kgem->max_gpu_size = MAX_CACHE_SIZE;
 	if (gen < 40) {
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 974a716..b6930e0 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -379,18 +379,10 @@ static inline int kgem_buffer_size(struct kgem_bo *bo)
 	return bo->size.bytes;
 }
 
-static inline bool kgem_bo_can_blt(struct kgem *kgem,
-				   struct kgem_bo *bo)
+static inline bool kgem_bo_blt_pitch_is_ok(struct kgem *kgem,
+					   struct kgem_bo *bo)
 {
-	int pitch;
-
-	if (bo->tiling == I915_TILING_Y) {
-		DBG(("%s: can not blt to handle=%d, tiling=Y\n",
-		     __FUNCTION__, bo->handle));
-		return false;
-	}
-
-	pitch = bo->pitch;
+	int pitch = bo->pitch;
 	if (kgem->gen >= 40 && bo->tiling)
 		pitch /= 4;
 	if (pitch > MAXSHORT) {
@@ -402,6 +394,18 @@ static inline bool kgem_bo_can_blt(struct kgem *kgem,
 	return true;
 }
 
+static inline bool kgem_bo_can_blt(struct kgem *kgem,
+				   struct kgem_bo *bo)
+{
+	if (bo->tiling == I915_TILING_Y) {
+		DBG(("%s: can not blt to handle=%d, tiling=Y\n",
+		     __FUNCTION__, bo->handle));
+		return false;
+	}
+
+	return kgem_bo_blt_pitch_is_ok(kgem, bo);
+}
+
 static inline bool kgem_bo_is_mappable(struct kgem *kgem,
 				       struct kgem_bo *bo)
 {
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 9f51028..a7ea95c 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -2140,10 +2140,10 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 	    !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
 		_kgem_submit(kgem);
 		if (!kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL))
-			return sna_tiling_copy_boxes(sna, alu,
-						     src_bo, src_dx, src_dy,
-						     dst_bo, dst_dx, dst_dy,
-						     bpp, box, nbox);
+			return sna_tiling_blt_copy_boxes(sna, alu,
+							 src_bo, src_dx, src_dy,
+							 dst_bo, dst_dx, dst_dy,
+							 bpp, box, nbox);
 		_kgem_set_mode(kgem, KGEM_BLT);
 	}
 
@@ -2244,7 +2244,8 @@ Bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
 	if (src_bo == dst_bo) {
 		DBG(("%s: dst == src\n", __FUNCTION__));
 
-		if (src_bo->tiling == I915_TILING_Y) {
+		if (src_bo->tiling == I915_TILING_Y &&
+		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
 			struct kgem_bo *bo;
 
 			DBG(("%s: src is Y-tiled\n", __FUNCTION__));
@@ -2287,7 +2288,8 @@ Bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
 				dst_bo = src_bo = bo;
 		}
 	} else {
-		if (src_bo->tiling == I915_TILING_Y) {
+		if (src_bo->tiling == I915_TILING_Y &&
+		    kgem_bo_blt_pitch_is_ok(&sna->kgem, src_bo)) {
 			DBG(("%s: src is y-tiled\n", __FUNCTION__));
 			assert(src_bo == sna_pixmap(src)->gpu_bo);
 			src_bo = sna_pixmap_change_tiling(src, I915_TILING_X);
@@ -2298,7 +2300,8 @@ Bool sna_blt_copy_boxes_fallback(struct sna *sna, uint8_t alu,
 			}
 		}
 
-		if (dst_bo->tiling == I915_TILING_Y) {
+		if (dst_bo->tiling == I915_TILING_Y &&
+		    kgem_bo_blt_pitch_is_ok(&sna->kgem, dst_bo)) {
 			DBG(("%s: dst is y-tiled\n", __FUNCTION__));
 			assert(dst_bo == sna_pixmap(dst)->gpu_bo);
 			dst_bo = sna_pixmap_change_tiling(dst, I915_TILING_X);
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index eb5df9d..62a8962 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -593,7 +593,7 @@ fallback:
 			int step;
 
 tile:
-			step = MIN(sna->render.max_3d_size,
+			step = MIN(sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel,
 				   8*(MAXSHORT&~63) / dst->drawable.bitsPerPixel);
 			while (step * step * 4 > sna->kgem.max_upload_tile_size)
 				step /= 2;
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index fc6e6df..bc8b2de 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -805,6 +805,80 @@ cleanup_tmp:
 	return ret;
 }
 
+bool
+sna_render_pixmap_partial(struct sna *sna,
+			  PixmapPtr pixmap,
+			  struct kgem_bo *bo,
+			  struct sna_composite_channel *channel,
+			  int16_t x, int16_t y,
+			  int16_t w, int16_t h)
+{
+	BoxRec box;
+	int tile_width, tile_height, tile_size;
+	int offset;
+
+	DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
+
+	if (bo->pitch > sna->render.max_3d_pitch)
+		return false;
+
+	box.x1 = x;
+	box.y1 = y;
+	box.x2 = x + w;
+	box.y2 = y + h;
+
+	if (box.x1 < 0)
+		box.x1 = 0;
+	if (box.y1 < 0)
+		box.y1 = 0;
+	if (box.x2 > pixmap->drawable.width)
+		box.x2 = pixmap->drawable.width;
+	if (box.y2 > pixmap->drawable.height)
+		box.y2 = pixmap->drawable.height;
+
+	kgem_get_tile_size(&sna->kgem, bo->tiling,
+			   &tile_width, &tile_height, &tile_size);
+
+	/* Ensure we align to an even tile row */
+	box.y1 = box.y1 & ~(2*tile_height - 1);
+	box.y2 = ALIGN(box.y2, 2*tile_height);
+	if (box.y2 > pixmap->drawable.height)
+		box.y2 = pixmap->drawable.height;
+
+	box.x1 = box.x1 & ~(tile_width * 8 / pixmap->drawable.bitsPerPixel - 1);
+	box.x2 = ALIGN(box.x2, tile_width * 8 / pixmap->drawable.bitsPerPixel);
+	if (box.x2 > pixmap->drawable.width)
+		box.x2 = pixmap->drawable.width;
+
+	w = box.x2 - box.x1;
+	h = box.y2 - box.y1;
+	DBG(("%s box=(%d, %d), (%d, %d): (%d, %d)/(%d, %d)\n", __FUNCTION__,
+	     box.x1, box.y1, box.x2, box.y2, w, h,
+	     pixmap->drawable.width, pixmap->drawable.height));
+	if (w <= 0 || h <= 0 ||
+	    w > sna->render.max_3d_size ||
+	    h > sna->render.max_3d_size)
+		return false;
+
+	/* How many tiles across are we? */
+	offset = box.x1 * pixmap->drawable.bitsPerPixel / 8 / tile_width * tile_size;
+	channel->bo = kgem_create_proxy(bo,
+					box.y1 * bo->pitch + offset,
+					h * bo->pitch);
+	if (channel->bo == NULL)
+		return false;
+
+	channel->bo->pitch = bo->pitch;
+
+	channel->offset[0] = x - box.x1;
+	channel->offset[1] = y - box.y1;
+	channel->scale[0] = 1.f/w;
+	channel->scale[1] = 1.f/h;
+	channel->width  = w;
+	channel->height = h;
+	return true;
+}
+
 static int
 sna_render_picture_partial(struct sna *sna,
 			   PicturePtr picture,
@@ -1068,13 +1142,25 @@ sna_render_picture_extract(struct sna *sna,
 						       I915_TILING_X, w, h,
 						       pixmap->drawable.bitsPerPixel),
 				    0);
-		if (bo && !sna_blt_copy_boxes(sna, GXcopy,
-					src_bo, 0, 0,
-					bo, -box.x1, -box.y1,
-					pixmap->drawable.bitsPerPixel,
-					&box, 1)) {
-			kgem_bo_destroy(&sna->kgem, bo);
-			bo = NULL;
+		if (bo) {
+			PixmapRec tmp;
+
+			tmp.drawable.width  = w;
+			tmp.drawable.height = h;
+			tmp.drawable.depth  = pixmap->drawable.depth;
+			tmp.drawable.bitsPerPixel = pixmap->drawable.bitsPerPixel;
+			tmp.devPrivate.ptr = NULL;
+
+			assert(tmp.drawable.width);
+			assert(tmp.drawable.height);
+
+			if (!sna->render.copy_boxes(sna, GXcopy,
+						    pixmap, src_bo, 0, 0,
+						    &tmp, bo, -box.x1, -box.y1,
+						    &box, 1)) {
+				kgem_bo_destroy(&sna->kgem, bo);
+				bo = NULL;
+			}
 		}
 	}
 
@@ -1541,7 +1627,6 @@ sna_render_composite_redirect(struct sna *sna,
 {
 	struct sna_composite_redirect *t = &op->redirect;
 	int bpp = op->dst.pixmap->drawable.bitsPerPixel;
-	struct sna_pixmap *priv;
 	struct kgem_bo *bo;
 
 #if NO_REDIRECT
@@ -1554,11 +1639,9 @@ sna_render_composite_redirect(struct sna *sna,
 	if (!width || !height)
 		return FALSE;
 
-	priv = sna_pixmap_force_to_gpu(op->dst.pixmap, MOVE_READ | MOVE_WRITE);
-	if (priv == NULL) {
-		DBG(("%s: fallback -- no GPU bo attached\n", __FUNCTION__));
+	if (width  > sna->render.max_3d_pitch ||
+	    height > sna->render.max_3d_pitch)
 		return FALSE;
-	}
 
 	if (op->dst.bo->pitch <= sna->render.max_3d_pitch) {
 		int tile_width, tile_height, tile_size;
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 94c2744..a689315 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -507,10 +507,16 @@ Bool sna_tiling_fill_boxes(struct sna *sna,
 			   const xRenderColor *color,
 			   PixmapPtr dst, struct kgem_bo *dst_bo,
 			   const BoxRec *box, int n);
+
 Bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu,
-			   struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
-			   struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
-			   int bpp, const BoxRec *box, int nbox);
+			   PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+			   PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+			   const BoxRec *box, int n);
+
+Bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
+			       struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+			       struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+			       int bpp, const BoxRec *box, int nbox);
 
 Bool sna_blt_composite(struct sna *sna,
 		       uint32_t op,
@@ -589,6 +595,14 @@ sna_render_pixmap_bo(struct sna *sna,
 		     int16_t w, int16_t h,
 		     int16_t dst_x, int16_t dst_y);
 
+bool
+sna_render_pixmap_partial(struct sna *sna,
+			  PixmapPtr pixmap,
+			  struct kgem_bo *bo,
+			  struct sna_composite_channel *channel,
+			  int16_t x, int16_t y,
+			  int16_t w, int16_t h);
+
 int
 sna_render_picture_extract(struct sna *sna,
 			   PicturePtr picture,
@@ -614,6 +628,13 @@ sna_render_picture_convert(struct sna *sna,
 			   int16_t w, int16_t h,
 			   int16_t dst_x, int16_t dst_y);
 
+inline static void sna_render_composite_redirect_init(struct sna_composite_op *op)
+{
+	struct sna_composite_redirect *t = &op->redirect;
+	t->real_bo = NULL;
+	t->damage = NULL;
+}
+
 Bool
 sna_render_composite_redirect(struct sna *sna,
 			      struct sna_composite_op *op,
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index 702192a..00e111c 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -421,10 +421,10 @@ done:
 	return ret;
 }
 
-Bool sna_tiling_copy_boxes(struct sna *sna, uint8_t alu,
-			   struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
-			   struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
-			   int bpp, const BoxRec *box, int nbox)
+Bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
+			       struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+			       struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+			       int bpp, const BoxRec *box, int nbox)
 {
 	RegionRec region, tile, this;
 	struct kgem_bo *bo;
@@ -516,3 +516,125 @@ done:
 	pixman_region_fini(&region);
 	return ret;
 }
+
+static Bool
+box_intersect(BoxPtr a, const BoxRec *b)
+{
+	if (a->x1 < b->x1)
+		a->x1 = b->x1;
+	if (a->x2 > b->x2)
+		a->x2 = b->x2;
+	if (a->y1 < b->y1)
+		a->y1 = b->y1;
+	if (a->y2 > b->y2)
+		a->y2 = b->y2;
+
+	return a->x1 < a->x2 && a->y1 < a->y2;
+}
+
+Bool
+sna_tiling_copy_boxes(struct sna *sna, uint8_t alu,
+		      PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy,
+		      PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+		      const BoxRec *box, int n)
+{
+	BoxRec extents, tile, stack[64], *clipped, *c;
+	PixmapRec p;
+	int i, step;
+	Bool ret = FALSE;
+
+	extents = box[0];
+	for (i = 1; i < n; i++) {
+		if (extents.x1 < box[i].x1)
+			extents.x1 = box[i].x1;
+		if (extents.y1 < box[i].y1)
+			extents.y1 = box[i].y1;
+
+		if (extents.x2 > box[i].x2)
+			extents.x2 = box[i].x2;
+		if (extents.y2 > box[i].y2)
+			extents.y2 = box[i].y2;
+	}
+
+	step = sna->render.max_3d_size - 4096 / dst->drawable.bitsPerPixel;
+	while (step * step * 4 > sna->kgem.max_upload_tile_size)
+		step /= 2;
+
+	DBG(("%s: tiling copy, using %dx%d tiles\n",
+	     __FUNCTION__, step, step));
+
+	if (n > ARRAY_SIZE(stack)) {
+		clipped = malloc(sizeof(BoxRec) * n);
+		if (clipped == NULL)
+			goto tiled_error;
+	} else
+		clipped = stack;
+
+	p.drawable.depth = src->drawable.depth;
+	p.drawable.bitsPerPixel = src->drawable.bitsPerPixel;
+	p.devPrivate.ptr = NULL;
+
+	for (tile.y1 = extents.y1; tile.y1 < extents.y2; tile.y1 = tile.y2) {
+		tile.y2 = tile.y1 + step;
+		if (tile.y2 > extents.y2)
+			tile.y2 = extents.y2;
+
+		for (tile.x1 = extents.x1; tile.x1 < extents.x2; tile.x1 = tile.x2) {
+			struct kgem_bo *tmp_bo;
+
+			tile.x2 = tile.x1 + step;
+			if (tile.x2 > extents.x2)
+				tile.x2 = extents.x2;
+
+			p.drawable.width  = tile.x2 - tile.x1;
+			p.drawable.height = tile.y2 - tile.y1;
+
+			tmp_bo = kgem_create_2d(&sna->kgem,
+						p.drawable.width,
+						p.drawable.height,
+						p.drawable.bitsPerPixel,
+						I915_TILING_X, 0);
+			if (!tmp_bo)
+				goto tiled_error;
+
+			c = clipped;
+			for (i = 0; i < n; i++) {
+				*c = box[i];
+				if (!box_intersect(c, &tile))
+					continue;
+
+				DBG(("%s: box(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d)\n",
+				     __FUNCTION__,
+				     c->x1, c->y1,
+				     c->x2, c->y2,
+				     src_dx, src_dy,
+				     c->x1 - tile.x1,
+				     c->y1 - tile.y1));
+				c++;
+			}
+
+			if (c == clipped ||
+			    (sna->render.copy_boxes(sna, GXcopy,
+						    src, src_bo, src_dx, src_dy,
+						    &p, tmp_bo, -tile.x1, -tile.y1,
+						    clipped, c - clipped) &&
+			     sna->render.copy_boxes(sna, alu,
+						    &p, tmp_bo, -tile.x1, -tile.y1,
+						    dst, dst_bo, dst_dx, dst_dy,
+						    clipped, c - clipped)))
+				i = 1;
+
+			kgem_bo_destroy(&sna->kgem, tmp_bo);
+
+			if (!i)
+				goto tiled_error;
+		}
+	}
+
+	ret = TRUE;
+tiled_error:
+	if (clipped != stack)
+		free(clipped);
+
+	return ret;
+}


More information about the xorg-commit mailing list