xf86-video-intel: 20 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_composite.c src/sna/sna_io.c src/sna/sna_render.h

Chris Wilson ickle at kemper.freedesktop.org
Thu Oct 27 02:35:15 PDT 2011


 src/sna/gen2_render.c   |   58 +
 src/sna/gen3_render.c   |   52 +
 src/sna/gen4_render.c   |   35 -
 src/sna/gen5_render.c   |   60 +
 src/sna/gen6_render.c   |   57 +
 src/sna/gen7_render.c   |   55 +
 src/sna/kgem.c          |    6 
 src/sna/sna_accel.c     | 1638 +++++++++++++++++++++++++++++++++++++++++++-----
 src/sna/sna_blt.c       |  232 +++++-
 src/sna/sna_composite.c |    3 
 src/sna/sna_io.c        |   21 
 src/sna/sna_render.h    |   10 
 12 files changed, 1941 insertions(+), 286 deletions(-)

New commits:
commit 991582f6678581e3d92c0497e10ecee67536efc2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Oct 27 09:27:41 2011 +0100

    sna/io: Update batch mode after submitting partial requests
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 5a6fdd9..f97b88a 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -143,8 +143,10 @@ void sna_read_boxes(struct sna *sna,
 	if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
 	    kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
 	    !kgem_check_batch(kgem, 8) ||
-	    !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL))
+	    !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
 		_kgem_submit(kgem);
+		_kgem_set_mode(kgem, KGEM_BLT);
+	}
 
 	tmp_nbox = nbox;
 	tmp_box = box;
@@ -196,10 +198,14 @@ void sna_read_boxes(struct sna *sna,
 
 			offset += pitch * height;
 		}
-		tmp_box += nbox_this_time;
 
 		_kgem_submit(kgem);
-	} while (tmp_nbox);
+		if (!tmp_nbox)
+			break;
+
+		_kgem_set_mode(kgem, KGEM_BLT);
+		tmp_box += nbox_this_time;
+	} while (1);
 	assert(offset == dst_bo->size);
 
 	kgem_buffer_read_sync(kgem, dst_bo);
@@ -315,8 +321,10 @@ void sna_write_boxes(struct sna *sna,
 	if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
 	    kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
 	    !kgem_check_batch(kgem, 8) ||
-	    !kgem_check_bo_fenced(kgem, dst_bo, NULL))
+	    !kgem_check_bo_fenced(kgem, dst_bo, NULL)) {
 		_kgem_submit(kgem);
+		_kgem_set_mode(kgem, KGEM_BLT);
+	}
 
 	do {
 		int nbox_this_time;
@@ -395,13 +403,14 @@ void sna_write_boxes(struct sna *sna,
 		} while (--nbox_this_time);
 		assert(offset == src_bo->size);
 
-		if (nbox)
+		if (nbox) {
 			_kgem_submit(kgem);
+			_kgem_set_mode(kgem, KGEM_BLT);
+		}
 
 		kgem_bo_destroy(kgem, src_bo);
 	} while (nbox);
 
-	_kgem_set_mode(kgem, KGEM_BLT);
 	sna->blt_state.fill_bo = 0;
 }
 
commit 91745de113340c8a281e8cb724b4a7e0b7a0711a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Oct 27 09:17:50 2011 +0100

    sna: Further improve use of fill->boxes for PolyRectangle
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index c8936fa..efa2013 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3869,17 +3869,13 @@ sna_poly_rectangle_blt(DrawablePtr drawable,
 	struct sna *sna = to_sna_from_drawable(drawable);
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	struct sna_fill_op fill;
+	BoxRec boxes[512], *b = boxes, *const last_box = boxes+ARRAY_SIZE(boxes);
 	int16_t dx, dy;
 	static void * const jump[] = {
-		&&no_damage_zero,
-		&&maybe_damage_zero_clipped,
-		&&no_damage_wide,
-		&&maybe_damage_wide_clipped,
-
-		&&damage_zero,
-		&&maybe_damage_zero_clipped,
-		&&damage_wide,
-		&&maybe_damage_wide_clipped,
+		&&zero,
+		&&zero_clipped,
+		&&wide,
+		&&wide_clipped,
 	};
 	unsigned v;
 
@@ -3893,167 +3889,125 @@ sna_poly_rectangle_blt(DrawablePtr drawable,
 
 	v = !!clipped;
 	v |= (gc->lineWidth != 0) << 1;
-	v |= (damage != NULL) << 2;
 	goto *jump[v];
 
-damage_zero:
+zero:
 	dx += drawable->x;
 	dy += drawable->y;
 
 	do {
-		BoxRec box;
+		xRectangle rr = *r++;
+		rr.x += dx;
+		rr.y += dy;
 
-		if (r->width <= 1 || r->height <= 1) {
-			box.x1 = r->x + dx;
-			box.y1 = r->y + dy;
-			box.x2 = box.x1 + r->width + 1;
-			box.y2 = box.y1 + r->height + 1;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
-			assert_pixmap_contains_box(pixmap, &box);
-			sna_damage_add_box(damage, &box);
-		} else {
-			box.x1 = r->x + dx;
-			box.y1 = r->y + dy;
-			box.x2 = box.x1 + r->width + 1;
-			box.y2 = box.y1 + 1;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
-			assert_pixmap_contains_box(pixmap, &box);
-			sna_damage_add_box(damage, &box);
-
-			box.y1 += r->height;
-			box.y2 += r->height;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
-			assert_pixmap_contains_box(pixmap, &box);
-			sna_damage_add_box(damage, &box);
-
-			box.y1 = r->y + dy + 1;
-			box.y2 = box.y1 + r->height - 1;
-			box.x1 = r->x + dx;
-			box.x2 = box.x1 + 1;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
-			assert_pixmap_contains_box(pixmap, &box);
-			sna_damage_add_box(damage, &box);
-
-			box.x1 += r->width;
-			box.x2 += r->width;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
-			assert_pixmap_contains_box(pixmap, &box);
-			sna_damage_add_box(damage, &box);
+		if (b+4 > last_box) {
+			fill.boxes(sna, &fill, boxes, b-boxes);
+			if (damage)
+				sna_damage_add_boxes(damage, boxes, b-boxes, 0, 0);
+			b = boxes;
 		}
-		r++;
-	} while (--n);
-	goto done;
-
-no_damage_zero:
-	dx += drawable->x;
-	dy += drawable->y;
 
-	do {
-		BoxRec box[4];
-
-		if (r->width <= 1 || r->height <= 1) {
-			box[0].x1 = r->x + dx;
-			box[0].y1 = r->y + dy;
-			box[0].x2 = box[0].x1 + r->width + 1;
-			box[0].y2 = box[0].y1 + r->height + 1;
+		if (rr.width <= 1 || rr.height <= 1) {
+			b->x1 = rr.x;
+			b->y1 = rr.y;
+			b->x2 = rr.x + rr.width + 1;
+			b->y2 = rr.y + rr.height + 1;
 			DBG(("%s: blt (%d, %d), (%d, %d)\n",
 			     __FUNCTION__,
-			     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
-			fill.box(sna, &fill, &box[0]);
+			     b->x1, b->y1, b->x2,b->y2));
+			b++;
 		} else {
-			box[0].x1 = r->x + dx;
-			box[0].y1 = r->y + dy;
-			box[0].x2 = box[0].x1 + r->width + 1;
-			box[0].y2 = box[0].y1 + 1;
+			b[0].x1 = rr.x;
+			b[0].y1 = rr.y;
+			b[0].x2 = rr.x + rr.width + 1;
+			b[0].y2 = rr.y + 1;
 
-			box[1] = box[0];
-			box[1].y1 += r->height;
-			box[1].y2 += r->height;
+			b[1] = b[0];
+			b[1].y1 += rr.height;
+			b[1].y2 += rr.height;
 
-			box[2].y1 = r->y + dy + 1;
-			box[2].y2 = box[2].y1 + r->height - 1;
-			box[2].x1 = r->x + dx;
-			box[2].x2 = box[2].x1 + 1;
+			b[2].y1 = rr.y + 1;
+			b[2].y2 = rr.y + rr.height - 1;
+			b[2].x1 = rr.x;
+			b[2].x2 = rr.x + 1;
 
-			box[3] = box[2];
-			box[3].x1 += r->width;
-			box[3].x2 += r->width;
+			b[3] = b[2];
+			b[3].x1 += rr.width;
+			b[3].x2 += rr.width;
 
-			fill.boxes(sna, &fill, box, 4);
+			b += 4;
 		}
-		r++;
 	} while (--n);
 	goto done;
 
-maybe_damage_zero_clipped:
+zero_clipped:
 	{
 		RegionRec clip;
-		BoxRec box[4], *b, *c;
-		int count = 0, i, j;
+		const BoxRec *clip_start, *clip_end, *c;
+		BoxRec box[4];
+		int count, i;
 
 		region_set(&clip, extents);
 		region_maybe_clip(&clip, gc->pCompositeClip);
+		if (!RegionNotEmpty(&clip))
+			return true;
 
+		clip_start = RegionBoxptr(&clip);
+		clip_end = clip_start + clip.data->numRects;
 		do {
-			if (r->width <= 1 || r->height <= 1) {
-				box[0].x1 = r->x + drawable->x;
-				box[0].y1 = r->y + drawable->y;
-				box[0].x2 = box[0].x1 + r->width + 1;
-				box[0].y2 = box[0].y1 + r->height + 1;
+			xRectangle rr = *r++;
+			rr.x += drawable->x;
+			rr.y += drawable->y;
+
+			if (rr.width <= 1 || rr.height <= 1) {
+				box[0].x1 = rr.x;
+				box[0].y1 = rr.y;
+				box[0].x2 = rr.x + rr.width + 1;
+				box[0].y2 = rr.y + r->height + 1;
 				count = 1;
 			} else {
-				box[0].x1 = r->x + drawable->x;
-				box[0].y1 = r->y + drawable->y;
-				box[0].x2 = box[0].x1 + r->width + 1;
-				box[0].y2 = box[0].y1 + 1;
+				box[0].x1 = rr.x;
+				box[0].y1 = rr.y;
+				box[0].x2 = rr.x + rr.width + 1;
+				box[0].y2 = rr.y + 1;
 
 				box[1] = box[0];
-				box[1].y1 += r->height;
-				box[1].y2 += r->height;
+				box[1].y1 += rr.height;
+				box[1].y2 += rr.height;
 
-				box[2].y1 = r->y + drawable->y + 1;
-				box[2].y2 = box[2].y1 + r->height - 1;
-				box[2].x1 = r->x + drawable->x;
-				box[2].x2 = box[2].x1 + 1;
+				box[2].y1 = rr.y + 1;
+				box[2].y2 = box[2].y1 + rr.height - 1;
+				box[2].x1 = rr.x;
+				box[2].x2 = rr.x + 1;
 
 				box[3] = box[2];
-				box[3].x1 += r->width;
-				box[3].x2 += r->width;
+				box[3].x1 += rr.width;
+				box[3].x2 += rr.width;
 				count = 4;
 			}
 
-			for (i = REGION_NUM_RECTS(&clip), c = REGION_RECTS(&clip); i--; c++) {
-				for (j = count, b = box; j--; b++) {
-					BoxRec clipped = *b;
-					if (box_intersect(&clipped, c)) {
-						clipped.x1 += dx;
-						clipped.x2 += dx;
-						clipped.y1 += dy;
-						clipped.y2 += dy;
-						DBG(("%s: blt (%d, %d), (%d, %d)\n",
-						     __FUNCTION__,
-						     clipped.x1, clipped.y1, clipped.x2, clipped.y2));
-						fill.box(sna, &fill, &clipped);
-						if (damage) {
-							assert_pixmap_contains_box(pixmap, &clipped);
-							sna_damage_add_box(damage, &clipped);
+			for (i = 0; i < count; i++) {
+				c = find_clip_box_for_y(clip_start,
+							clip_end,
+							box[i].y1);
+				while (c != clip_end) {
+					if (box[i].y2 <= c->y1)
+						break;
+
+					*b = box[i];
+					if (box_intersect(b, c)) {
+						b->x1 += dx;
+						b->x2 += dx;
+						b->y1 += dy;
+						b->y2 += dy;
+						if (++b == last_box) {
+							fill.boxes(sna, &fill, boxes, last_box-boxes);
+							if (damage)
+								sna_damage_add_boxes(damage, boxes, b-boxes, 0, 0);
+							b = boxes;
 						}
 					}
+
 				}
 			}
 			r++;
@@ -4061,78 +4015,85 @@ maybe_damage_zero_clipped:
 	}
 	goto done;
 
-maybe_damage_wide_clipped:
+wide_clipped:
 	{
 		RegionRec clip;
+		BoxRec box[4];
+		const BoxRec *clip_start, *clip_end, *c;
 		int16_t offset2 = gc->lineWidth;
 		int16_t offset1 = offset2 >> 1;
 		int16_t offset3 = offset2 - offset1;
 
 		region_set(&clip, extents);
 		region_maybe_clip(&clip, gc->pCompositeClip);
+		if (!RegionNotEmpty(&clip))
+			return true;
 
+		clip_start = RegionBoxptr(&clip);
+		clip_end = clip_start + clip.data->numRects;
 		do {
-			BoxRec box[4], *c, *b;
-			int16_t x = r->x + drawable->x;
-			int16_t y = r->y + drawable->y;
-			int16_t width = r->width;
-			int16_t height = r->height;
-			int count, i, j;
-			r++;
-
-			if (height < offset2 || width < offset1) {
-				if (height == 0) {
-					box[0].x1 = x;
-					box[0].x2 = x + width + 1;
+			xRectangle rr = *r++;
+			int count;
+			rr.x += drawable->x;
+			rr.y += drawable->y;
+
+			if (rr.height < offset2 || rr.width < offset1) {
+				if (rr.height == 0) {
+					box[0].x1 = rr.x;
+					box[0].x2 = rr.x + rr.width + 1;
 				} else {
-					box[0].x1 = x - offset1;
-					box[0].x2 = box[0].x1 + width + offset2;
+					box[0].x1 = rr.x - offset1;
+					box[0].x2 = box[0].x1 + rr.width + offset2;
 				}
-				if (width == 0) {
-					box[0].y1 = y;
-					box[0].y2 = y + height + 1;
+				if (rr.width == 0) {
+					box[0].y1 = rr.y;
+					box[0].y2 = rr.y + rr.height + 1;
 				} else {
-					box[0].y1 = y - offset1;
-					box[0].y2 = box[0].y1 + height + offset2;
+					box[0].y1 = rr.y - offset1;
+					box[0].y2 = box[0].y1 + rr.height + offset2;
 				}
 				count = 1;
 			} else {
-				box[0].x1 = x - offset1;
-				box[0].x2 = box[0].x1 + width + offset2;
-				box[0].y1 = y - offset1;
+				box[0].x1 = rr.x - offset1;
+				box[0].x2 = box[0].x1 + rr.width + offset2;
+				box[0].y1 = rr.y - offset1;
 				box[0].y2 = box[0].y1 + offset2;
 
-				box[1].x1 = x - offset1;
+				box[1].x1 = rr.x - offset1;
 				box[1].x2 = box[1].x1 + offset2;
-				box[1].y1 = y + offset3;
-				box[1].y2 = y + height - offset1;
+				box[1].y1 = rr.y + offset3;
+				box[1].y2 = rr.y + rr.height - offset1;
 
-				box[2].x1 = x + width - offset1;
+				box[2].x1 = rr.x + rr.width - offset1;
 				box[2].x2 = box[2].x1 + offset2;
-				box[2].y1 = y + offset3;
-				box[2].y2 = y + height - offset1;
+				box[2].y1 = rr.y + offset3;
+				box[2].y2 = rr.y + rr.height - offset1;
 
 				box[3] = box[1];
-				box[3].y1 += height;
-				box[3].y2 += height;
+				box[3].y1 += rr.height;
+				box[3].y2 += rr.height;
 				count = 4;
 			}
 
-			for (i = REGION_NUM_RECTS(&clip), c = REGION_RECTS(&clip); i--; c++) {
-				for (j = count, b = box; j--; b++) {
-					BoxRec clipped = *b;
-					if (box_intersect(&clipped, c)) {
-						clipped.x1 += dx;
-						clipped.x2 += dx;
-						clipped.y1 += dy;
-						clipped.y2 += dy;
-						DBG(("%s: blt (%d, %d), (%d, %d)\n",
-						     __FUNCTION__,
-						     clipped.x1, clipped.y1, clipped.x2, clipped.y2));
-						fill.box(sna, &fill, &clipped);
-						if (damage) {
-							assert_pixmap_contains_box(pixmap, &clipped);
-							sna_damage_add_box(damage, &clipped);
+			while (count--) {
+				c = find_clip_box_for_y(clip_start,
+							clip_end,
+							box[count].y1);
+				while (c != clip_end) {
+					if (box[count].y2 <= c->y1)
+						break;
+
+					*b = box[count];
+					if (box_intersect(b, c)) {
+						b->x1 += dx;
+						b->x2 += dx;
+						b->y1 += dy;
+						b->y2 += dy;
+						if (++b == last_box) {
+							fill.boxes(sna, &fill, boxes, last_box-boxes);
+							if (damage)
+								sna_damage_add_boxes(damage, boxes, b-boxes, 0, 0);
+							b = boxes;
 						}
 					}
 				}
@@ -4141,7 +4102,7 @@ maybe_damage_wide_clipped:
 	}
 	goto done;
 
-no_damage_wide:
+wide:
 	{
 		int offset2 = gc->lineWidth;
 		int offset1 = offset2 >> 1;
@@ -4151,126 +4112,63 @@ no_damage_wide:
 		dy += drawable->y;
 
 		do {
-			BoxRec box[4];
-			int16_t x = r->x + dx;
-			int16_t y = r->y + dy;
-			int16_t width = r->width;
-			int16_t height = r->height;
-			r++;
-
-			if (height < offset2 || width < offset1) {
-				if (height == 0) {
-					box[0].x1 = x;
-					box[0].x2 = x + width + 1;
-				} else {
-					box[0].x1 = x - offset1;
-					box[0].x2 = box[0].x1 + width + offset2;
-				}
-				if (width == 0) {
-					box[0].y1 = y;
-					box[0].y2 = y + height + 1;
-				} else {
-					box[0].y1 = y - offset1;
-					box[0].y2 = box[0].y1 + height + offset2;
-				}
-				fill.box(sna, &fill, &box[0]);
-			} else {
-				box[0].x1 = x - offset1;
-				box[0].x2 = box[0].x1 + width + offset2;
-				box[0].y1 = y - offset1;
-				box[0].y2 = box[0].y1 + offset2;
-
-				box[1] = box[0];
-				box[1].y1 = y + height - offset1;
-				box[1].y2 = box[1].y1 + offset2;
-
-				box[2].x1 = x - offset1;
-				box[2].x2 = box[2].x1 + offset2;
-				box[2].y1 = y + offset3;
-				box[2].y2 = y + height - offset1;
-
-				box[3] = box[2];
-				box[3].x1 = x + width - offset1;
-				box[3].x2 = box[3].x1 + offset2;
-
-				fill.boxes(sna, &fill, box, 4);
+			xRectangle rr = *r++;
+			rr.x += dx;
+			rr.y += dy;
+
+			if (b+4 > last_box) {
+				fill.boxes(sna, &fill, boxes, last_box-boxes);
+				if (damage)
+					sna_damage_add_boxes(damage, boxes, b-boxes, 0, 0);
+				b = boxes;
 			}
 
-		} while (--n);
-	}
-	goto done;
-
-damage_wide:
-	{
-		int offset2 = gc->lineWidth;
-		int offset1 = offset2 >> 1;
-		int offset3 = offset2 - offset1;
-
-		dx += drawable->x;
-		dy += drawable->y;
-
-		do {
-			BoxRec box;
-			int16_t x = r->x + dx;
-			int16_t y = r->y + dy;
-			int16_t width = r->width;
-			int16_t height = r->height;
-			r++;
-
-			if (height < offset2 || width < offset1) {
-				if (height == 0) {
-					box.x1 = x;
-					box.x2 = x + width + 1;
+			if (rr.height < offset2 || rr.width < offset1) {
+				if (rr.height == 0) {
+					b->x1 = rr.x;
+					b->x2 = rr.x + rr.width + 1;
 				} else {
-					box.x1 = x - offset1;
-					box.x2 = box.x1 + width + offset2;
+					b->x1 = rr.x - offset1;
+					b->x2 = rr.x + rr.width + offset2;
 				}
-				if (width == 0) {
-					box.y1 = y;
-					box.y2 = y + height + 1;
+				if (rr.width == 0) {
+					b->y1 = rr.y;
+					b->y2 = rr.y + rr.height + 1;
 				} else {
-					box.y1 = y - offset1;
-					box.y2 = box.y1 + height + offset2;
+					b->y1 = rr.y - offset1;
+					b->y2 = rr.y + rr.height + offset2;
 				}
-				fill.box(sna, &fill, &box);
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
+				b++;
 			} else {
-				box.x1 = x - offset1;
-				box.x2 = box.x1 + width + offset2;
-				box.y1 = y - offset1;
-				box.y2 = box.y1 + offset2;
-				fill.box(sna, &fill, &box);
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
-
-				box.x1 = x - offset1;
-				box.x2 = box.x1 + offset2;
-				box.y1 = y + offset3;
-				box.y2 = y + height - offset1;
-				fill.box(sna, &fill, &box);
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
-
-				box.x1 = x + width - offset1;
-				box.x2 = box.x1 + offset2;
-				fill.box(sna, &fill, &box);
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
-
-				box.x1 = x - offset1;
-				box.x2 = box.x1 + width + offset2;
-				box.y1 = y + height - offset1;
-				box.y2 = box.y1 + offset2;
-				fill.box(sna, &fill, &box);
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
+				b[0].x1 = rr.x - offset1;
+				b[0].x2 = b[0].x1 + rr.width + offset2;
+				b[0].y1 = rr.y - offset1;
+				b[0].y2 = b[0].y1 + offset2;
+
+				b[1] = b[0];
+				b[1].y1 = rr.y + rr.height - offset1;
+				b[1].y2 = b[1].y1 + offset2;
+
+				b[2].x1 = rr.x - offset1;
+				b[2].x2 = b[2].x1 + offset2;
+				b[2].y1 = rr.y + offset3;
+				b[2].y2 = rr.y + rr.height - offset1;
+
+				b[3] = b[2];
+				b[3].x1 = rr.x + rr.width - offset1;
+				b[3].x2 = b[3].x1 + offset2;
+				b += 4;
 			}
 		} while (--n);
 	}
 	goto done;
 
 done:
+	if (b != boxes) {
+		fill.boxes(sna, &fill, boxes, b-boxes);
+		if (damage)
+			sna_damage_add_boxes(damage, boxes, b-boxes, 0, 0);
+	}
 	fill.done(sna, &fill);
 	return TRUE;
 }
commit ffe1bdea82c07a2738743855dfa83494531b41aa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 18:37:29 2011 +0100

    sna/blt: After submitting the batch, it will be empty
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 2aa5bc1..ffe81df 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -207,7 +207,8 @@ noinline static void sna_blt_fill_begin(struct sna *sna,
 	_kgem_submit(kgem);
 	_kgem_set_mode(kgem, KGEM_BLT);
 
-	b = kgem->batch + kgem->nbatch;
+	assert(kgem->nbatch == 0);
+	b = kgem->batch;
 	b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
 	if (blt->bpp == 32)
 		b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
@@ -223,7 +224,7 @@ noinline static void sna_blt_fill_begin(struct sna *sna,
 	b[6] = blt->pixel;
 	b[7] = 0;
 	b[8] = 0;
-	kgem->nbatch += 9;
+	kgem->nbatch = 9;
 }
 
 inline static void sna_blt_fill_one(struct sna *sna,
commit b34e7454574f36796560955fb9d3d33ca00924d6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 18:11:25 2011 +0100

    sna/blt: Convert Copy with color 0 into a Clear
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 8d07035..2aa5bc1 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -142,6 +142,9 @@ static bool sna_blt_fill_init(struct sna *sna,
 	if (pitch > MAXSHORT)
 		return FALSE;
 
+	if (pixel == 0)
+		alu = GXclear;
+
 	blt->br13 = 1<<31 | (fill_ROP[alu] << 16) | pitch;
 	switch (bpp) {
 	default: assert(0);
@@ -1524,6 +1527,9 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 		return FALSE;
 	}
 
+	if (color == 0)
+		alu = GXclear;
+
 	br13 |= fill_ROP[alu] << 16;
 	switch (bpp) {
 	default: assert(0);
@@ -1603,6 +1609,9 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
 	if (br13 > MAXSHORT)
 		return FALSE;
 
+	if (pixel == 0)
+		alu = GXclear;
+
 	br13 |= 1<<31 | fill_ROP[alu] << 16;
 	switch (bpp) {
 	default: assert(0);
commit 389241aa6469b2e64a58cc0527e3310fdc69ffb7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 17:50:38 2011 +0100

    sna: Suppress an overwritten fill
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index bbe9dda..8d07035 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1756,6 +1756,18 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 	case 8: break;
 	}
 
+	/* Compare first box against a previous fill */
+	if (kgem->nbatch >= 6 &&
+	    (alu == GXcopy || alu == GXclear) &&
+	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->handle &&
+	    kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) &&
+	    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+	    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+		DBG(("%s: deleting last fill\n", __FUNCTION__));
+		kgem->nbatch -= 6;
+		kgem->nreloc--;
+	}
+
 	kgem_set_mode(kgem, KGEM_BLT);
 	if (!kgem_check_batch(kgem, 8) ||
 	    !kgem_check_reloc(kgem, 2) ||
commit baf78fcddb87eba987163485ad8fd227fb244f29
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 12:54:13 2011 +0100

    sna/gen2: Cache the last solid colour for spans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 440a343..aa315e6 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1593,8 +1593,11 @@ static void gen2_emit_composite_spans_state(struct sna *sna,
 	gen2_emit_spans_pipeline(sna, op);
 
 	if (op->base.src.is_solid) {
-		BATCH(_3DSTATE_DFLT_SPECULAR_CMD);
-		BATCH(op->base.src.u.gen2.pixel);
+		if (op->base.src.u.gen2.pixel != sna->render_state.gen2.specular) {
+			BATCH(_3DSTATE_DFLT_SPECULAR_CMD);
+			BATCH(op->base.src.u.gen2.pixel);
+			sna->render_state.gen2.specular = op->base.src.u.gen2.pixel;
+		}
 	} else {
 		uint32_t v =_3DSTATE_VERTEX_FORMAT_2_CMD |
 			(op->base.src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_3D);
@@ -2534,6 +2537,7 @@ gen2_render_reset(struct sna *sna)
 	sna->render_state.gen2.vft = 0;
 
 	sna->render_state.gen2.diffuse = 0x0c0ffee0;
+	sna->render_state.gen2.specular = 0x0c0ffee0;
 }
 
 static void
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index d31b55c..774c576 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -275,8 +275,9 @@ struct gen2_render_state {
 	Bool need_invariant;
 	Bool logic_op_enabled;
 	uint32_t ls1, ls2, vft;
-	uint16_t vertex_offset;
 	uint32_t diffuse;
+	uint32_t specular;
+	uint16_t vertex_offset;
 };
 
 struct gen3_render_state {
commit ff4715a36999a43cee20cf3474b4ae0af646342c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 12:40:46 2011 +0100

    sna/composite: Discard opaque masks
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 80ccf9c..bbe9dda 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -652,6 +652,17 @@ is_white(PicturePtr picture)
 		return pixel_is_white(get_pixel(picture), picture->format);
 }
 
+bool
+sna_composite_mask_is_opaque(PicturePtr mask)
+{
+	if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format))
+		return is_solid(mask) && is_white(mask);
+	else if (!PICT_FORMAT_A(mask->format))
+		return TRUE;
+	else
+		return is_solid(mask) && is_opaque_solid(mask);
+}
+
 fastcall
 static void blt_composite_fill(struct sna *sna,
 			       const struct sna_composite_op *op,
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 3962d75..e9dc965 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -426,6 +426,9 @@ sna_composite(CARD8 op,
 	     dst_x, dst_y, dst->pDrawable->x, dst->pDrawable->y,
 	     width, height));
 
+	if (mask && sna_composite_mask_is_opaque(mask))
+		mask = NULL;
+
 	if (!sna_compute_composite_region(&region,
 					  src, mask, dst,
 					  src_x,  src_y,
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 6c18791..d31b55c 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -569,4 +569,7 @@ void
 sna_render_composite_redirect_done(struct sna *sna,
 				   const struct sna_composite_op *op);
 
+bool
+sna_composite_mask_is_opaque(PicturePtr mask);
+
 #endif /* SNA_RENDER_H */
commit 0079579ebd548fc1b490a42928373507a93229bd
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 11:49:53 2011 +0100

    sna: Fix debug compilation, again.

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 3ec2e08..440a343 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -2063,7 +2063,7 @@ gen2_render_fill_op_boxes(struct sna *sna,
 			  int nbox)
 {
 	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2, n));
+	     box->x1, box->y1, box->x2, box->y2, nbox));
 
 	do {
 		int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index b9ce45f..2b70b85 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3818,7 +3818,7 @@ gen3_render_fill_op_boxes(struct sna *sna,
 			  int nbox)
 {
 	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2, n));
+	     box->x1, box->y1, box->x2, box->y2, nbox));
 
 	do {
 		int nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index e72283e..a177d32 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2540,7 +2540,7 @@ gen5_render_fill_op_boxes(struct sna *sna,
 			  int nbox)
 {
 	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2, n));
+	     box->x1, box->y1, box->x2, box->y2, nbox));
 
 	do {
 		int nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox);
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 05e6d63..0dde625 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2753,7 +2753,7 @@ gen6_render_op_fill_boxes(struct sna *sna,
 			  int nbox)
 {
 	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2, n));
+	     box->x1, box->y1, box->x2, box->y2, nbox));
 
 	do {
 		int nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 05b65f3..b9f23ee 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2903,7 +2903,7 @@ gen7_render_fill_op_boxes(struct sna *sna,
 			  int nbox)
 {
 	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2, n));
+	     box->x1, box->y1, box->x2, box->y2, nbox));
 
 	do {
 		int nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox);
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6f98f27..e8df7d9 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -998,10 +998,10 @@ void _kgem_submit(struct kgem *kgem)
 					       i,
 					       kgem->exec[i].handle,
 					       (int)kgem->exec[i].offset,
-					       found ? found->size : 0,
-					       found ? found->tiling : 0,
+					       found ? found->size : -1,
+					       found ? found->tiling : -1,
 					       (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE),
-					       found ? found->deleted : 1);
+					       found ? found->deleted : -1);
 				}
 				for (i = 0; i < kgem->nreloc; i++) {
 					ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n",
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 153ec21..c8936fa 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2537,7 +2537,7 @@ sna_poly_zero_line_blt(DrawablePtr drawable,
 	void *jump, *ret;
 
 	DBG(("%s: alu=%d, pixel=%lx, n=%d, clipped=%d, damage=%p\n",
-	     __FUNCTION__, gc->alu, gc->fgPixel, n, clipped, damage));
+	     __FUNCTION__, gc->alu, gc->fgPixel, _n, clipped, damage));
 	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
 		return FALSE;
 
@@ -3348,7 +3348,7 @@ sna_poly_zero_segment_blt(DrawablePtr drawable,
 	void *jump, *ret;
 
 	DBG(("%s: alu=%d, pixel=%lx, n=%d, clipped=%d, damage=%p\n",
-	     __FUNCTION__, gc->alu, gc->fgPixel, n, clipped, damage));
+	     __FUNCTION__, gc->alu, gc->fgPixel, _n, clipped, damage));
 	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
 		return FALSE;
 
commit 5988422455ebb2a3b1cafadc1ff302c35a7a5039
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 11:47:45 2011 +0100

    sna: Handle degenerate copy area whilst wedged
    
    We want to return early to avoid handing the empty box as the lower code
    makes the assumption that it has work to do.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 8df94eb..153ec21 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1708,6 +1708,8 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 		region.data = NULL;
 		if (gc->pCompositeClip)
 			RegionIntersect(&region, &region, gc->pCompositeClip);
+		if (!RegionNotEmpty(&region))
+			return NULL;
 
 		sna_drawable_move_region_to_cpu(dst, &region, true);
 		RegionTranslate(&region,
@@ -1891,6 +1893,10 @@ no_damage_clipped_translate:
 no_damage_clipped:
 		region_set(&clip, extents);
 		region_maybe_clip(&clip, gc->pCompositeClip);
+		assert(clip.extents.x1 >= 0);
+		assert(clip.extents.y1 >= 0);
+		assert(clip.extents.x2 <= pixmap->drawable.width);
+		assert(clip.extents.y2 <= pixmap->drawable.height);
 
 		DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
 		     __FUNCTION__,
@@ -1991,6 +1997,10 @@ damage_clipped_translate:
 damage_clipped:
 		region_set(&clip, extents);
 		region_maybe_clip(&clip, gc->pCompositeClip);
+		assert(clip.extents.x1 >= 0);
+		assert(clip.extents.y1 >= 0);
+		assert(clip.extents.x2 <= pixmap->drawable.width);
+		assert(clip.extents.y2 <= pixmap->drawable.height);
 
 		DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
 		     __FUNCTION__,
commit 3fc7cd2dc8e2b9c0d4143a9f6657f84050ae249a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Oct 23 20:59:50 2011 +0100

    sna/blt: Rename the composite paths
    
    Whilst re-reading, I found the old naming scheme slightly muddled and
    not nearly as descriptive as it should be. Hopefully this slight tweak
    reads better in 6 months time.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 90b9b1c..80ccf9c 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -653,7 +653,7 @@ is_white(PicturePtr picture)
 }
 
 fastcall
-static void blt_fill_composite(struct sna *sna,
+static void blt_composite_fill(struct sna *sna,
 			       const struct sna_composite_op *op,
 			       const struct sna_composite_rectangles *r)
 {
@@ -764,21 +764,21 @@ inline static void _sna_blt_fill_boxes(struct sna *sna,
 	} while (1);
 }
 
-fastcall static void blt_fill_composite_box_no_offset(struct sna *sna,
+fastcall static void blt_composite_fill_box_no_offset(struct sna *sna,
 						      const struct sna_composite_op *op,
 						      const BoxRec *box)
 {
 	_sna_blt_fill_box(sna, &op->u.blt, box);
 }
 
-static void blt_fill_composite_boxes_no_offset(struct sna *sna,
+static void blt_composite_fill_boxes_no_offset(struct sna *sna,
 					       const struct sna_composite_op *op,
 					       const BoxRec *box, int n)
 {
 	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
 }
 
-fastcall static void blt_fill_composite_box(struct sna *sna,
+fastcall static void blt_composite_fill_box(struct sna *sna,
 					    const struct sna_composite_op *op,
 					    const BoxRec *box)
 {
@@ -789,7 +789,7 @@ fastcall static void blt_fill_composite_box(struct sna *sna,
 			 box->y2 - box->y1);
 }
 
-static void blt_fill_composite_boxes(struct sna *sna,
+static void blt_composite_fill_boxes(struct sna *sna,
 				     const struct sna_composite_op *op,
 				     const BoxRec *box, int n)
 {
@@ -807,13 +807,13 @@ prepare_blt_clear(struct sna *sna,
 {
 	DBG(("%s\n", __FUNCTION__));
 
-	op->blt   = blt_fill_composite;
+	op->blt   = blt_composite_fill;
 	if (op->dst.x|op->dst.y) {
-		op->box   = blt_fill_composite_box;
-		op->boxes = blt_fill_composite_boxes;
+		op->box   = blt_composite_fill_box;
+		op->boxes = blt_composite_fill_boxes;
 	} else {
-		op->box   = blt_fill_composite_box_no_offset;
-		op->boxes = blt_fill_composite_boxes_no_offset;
+		op->box   = blt_composite_fill_box_no_offset;
+		op->boxes = blt_composite_fill_boxes_no_offset;
 	}
 	op->done  = blt_done;
 
@@ -830,13 +830,13 @@ prepare_blt_fill(struct sna *sna,
 {
 	DBG(("%s\n", __FUNCTION__));
 
-	op->blt   = blt_fill_composite;
+	op->blt   = blt_composite_fill;
 	if (op->dst.x|op->dst.y) {
-		op->box   = blt_fill_composite_box;
-		op->boxes = blt_fill_composite_boxes;
+		op->box   = blt_composite_fill_box;
+		op->boxes = blt_composite_fill_boxes;
 	} else {
-		op->box   = blt_fill_composite_box_no_offset;
-		op->boxes = blt_fill_composite_boxes_no_offset;
+		op->box   = blt_composite_fill_box_no_offset;
+		op->boxes = blt_composite_fill_boxes_no_offset;
 	}
 	op->done  = blt_done;
 
@@ -847,7 +847,7 @@ prepare_blt_fill(struct sna *sna,
 }
 
 fastcall static void
-blt_copy_composite(struct sna *sna,
+blt_composite_copy(struct sna *sna,
 		   const struct sna_composite_op *op,
 		   const struct sna_composite_rectangles *r)
 {
@@ -893,7 +893,7 @@ blt_copy_composite(struct sna *sna,
 			 x1, y1);
 }
 
-fastcall static void blt_copy_composite_box(struct sna *sna,
+fastcall static void blt_composite_copy_box(struct sna *sna,
 					    const struct sna_composite_op *op,
 					    const BoxRec *box)
 {
@@ -908,7 +908,7 @@ fastcall static void blt_copy_composite_box(struct sna *sna,
 			 box->y1 + op->dst.y);
 }
 
-static void blt_copy_composite_boxes(struct sna *sna,
+static void blt_composite_copy_boxes(struct sna *sna,
 				     const struct sna_composite_op *op,
 				     const BoxRec *box, int nbox)
 {
@@ -941,9 +941,9 @@ prepare_blt_copy(struct sna *sna,
 
 	DBG(("%s\n", __FUNCTION__));
 
-	op->blt   = blt_copy_composite;
-	op->box   = blt_copy_composite_box;
-	op->boxes = blt_copy_composite_boxes;
+	op->blt   = blt_composite_copy;
+	op->box   = blt_composite_copy_box;
+	op->boxes = blt_composite_copy_boxes;
 	if (sna->kgem.gen >= 60)
 		op->done  = gen6_blt_copy_done;
 	else
@@ -1119,9 +1119,9 @@ prepare_blt_put(struct sna *sna,
 		free_bo = src_bo;
 	}
 	if (src_bo) {
-		op->blt   = blt_copy_composite;
-		op->box   = blt_copy_composite_box;
-		op->boxes = blt_copy_composite_boxes;
+		op->blt   = blt_composite_copy;
+		op->box   = blt_composite_copy_box;
+		op->boxes = blt_composite_copy_boxes;
 
 		op->u.blt.src_pixmap = (void *)free_bo;
 		op->done = blt_vmap_done;
commit 280325900bf3c17c3177f6e3f52236d6b452e0f3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Oct 23 20:58:48 2011 +0100

    sna/blt: Share the common fill->boxes between blt and render paths
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 9b598c9..90b9b1c 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -223,10 +223,10 @@ noinline static void sna_blt_fill_begin(struct sna *sna,
 	kgem->nbatch += 9;
 }
 
-static void sna_blt_fill_one(struct sna *sna,
-			     const struct sna_blt_state *blt,
-			     int16_t x, int16_t y,
-			     int16_t width, int16_t height)
+inline static void sna_blt_fill_one(struct sna *sna,
+				    const struct sna_blt_state *blt,
+				    int16_t x, int16_t y,
+				    int16_t width, int16_t height)
 {
 	struct kgem *kgem = &sna->kgem;
 	uint32_t *b;
@@ -242,10 +242,11 @@ static void sna_blt_fill_one(struct sna *sna,
 		sna_blt_fill_begin(sna, blt);
 
 	b = kgem->batch + kgem->nbatch;
+	kgem->nbatch += 3;
+
 	b[0] = blt->cmd;
 	b[1] = y << 16 | x;
 	b[2] = b[1] + (height << 16 | width);
-	kgem->nbatch += 3;
 }
 
 static Bool sna_blt_copy_init(struct sna *sna,
@@ -679,6 +680,104 @@ static void blt_fill_composite(struct sna *sna,
 	sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1);
 }
 
+inline static void _sna_blt_fill_box(struct sna *sna,
+				     const struct sna_blt_state *blt,
+				     const BoxRec *box)
+{
+	struct kgem *kgem = &sna->kgem;
+	uint32_t *b;
+
+	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2,
+	     blt->pixel));
+
+	assert(box->x1 >= 0);
+	assert(box->y1 >= 0);
+	assert(box->y2 * blt->bo[0]->pitch <= blt->bo[0]->size);
+
+	if (!kgem_check_batch(kgem, 3))
+		sna_blt_fill_begin(sna, blt);
+
+	b = kgem->batch + kgem->nbatch;
+	kgem->nbatch += 3;
+
+	b[0] = blt->cmd;
+	*(uint64_t *)(b+1) = *(uint64_t *)box;
+}
+
+inline static void _sna_blt_fill_boxes(struct sna *sna,
+				       const struct sna_blt_state *blt,
+				       const BoxRec *box,
+				       int nbox)
+{
+	struct kgem *kgem = &sna->kgem;
+	uint32_t cmd = blt->cmd;
+
+	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, nbox));
+
+	if (!kgem_check_batch(kgem, 3))
+		sna_blt_fill_begin(sna, blt);
+
+	do {
+		uint32_t *b = kgem->batch + kgem->nbatch;
+		int nbox_this_time;
+
+		nbox_this_time = nbox;
+		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+		assert(nbox_this_time);
+		nbox -= nbox_this_time;
+
+		kgem->nbatch += 3 * nbox_this_time;
+		while (nbox_this_time >= 8) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
+			b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++;
+			b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++;
+			b[12] = cmd; *(uint64_t *)(b+13) = *(uint64_t *)box++;
+			b[15] = cmd; *(uint64_t *)(b+16) = *(uint64_t *)box++;
+			b[18] = cmd; *(uint64_t *)(b+19) = *(uint64_t *)box++;
+			b[21] = cmd; *(uint64_t *)(b+22) = *(uint64_t *)box++;
+			b += 24;
+			nbox_this_time -= 8;
+		}
+		if (nbox_this_time & 4) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
+			b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++;
+			b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++;
+			b += 12;
+		}
+		if (nbox_this_time & 2) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
+			b += 6;
+		}
+		if (nbox_this_time & 1) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+		}
+
+		if (!nbox)
+			return;
+
+		sna_blt_fill_begin(sna, blt);
+	} while (1);
+}
+
+fastcall static void blt_fill_composite_box_no_offset(struct sna *sna,
+						      const struct sna_composite_op *op,
+						      const BoxRec *box)
+{
+	_sna_blt_fill_box(sna, &op->u.blt, box);
+}
+
+static void blt_fill_composite_boxes_no_offset(struct sna *sna,
+					       const struct sna_composite_op *op,
+					       const BoxRec *box, int n)
+{
+	_sna_blt_fill_boxes(sna, &op->u.blt, box, n);
+}
+
 fastcall static void blt_fill_composite_box(struct sna *sna,
 					    const struct sna_composite_op *op,
 					    const BoxRec *box)
@@ -709,8 +808,13 @@ prepare_blt_clear(struct sna *sna,
 	DBG(("%s\n", __FUNCTION__));
 
 	op->blt   = blt_fill_composite;
-	op->box   = blt_fill_composite_box;
-	op->boxes = blt_fill_composite_boxes;
+	if (op->dst.x|op->dst.y) {
+		op->box   = blt_fill_composite_box;
+		op->boxes = blt_fill_composite_boxes;
+	} else {
+		op->box   = blt_fill_composite_box_no_offset;
+		op->boxes = blt_fill_composite_boxes_no_offset;
+	}
 	op->done  = blt_done;
 
 	return sna_blt_fill_init(sna, &op->u.blt,
@@ -727,8 +831,13 @@ prepare_blt_fill(struct sna *sna,
 	DBG(("%s\n", __FUNCTION__));
 
 	op->blt   = blt_fill_composite;
-	op->box   = blt_fill_composite_box;
-	op->boxes = blt_fill_composite_boxes;
+	if (op->dst.x|op->dst.y) {
+		op->box   = blt_fill_composite_box;
+		op->boxes = blt_fill_composite_boxes;
+	} else {
+		op->box   = blt_fill_composite_box_no_offset;
+		op->boxes = blt_fill_composite_boxes_no_offset;
+	}
 	op->done  = blt_done;
 
 	return sna_blt_fill_init(sna, &op->u.blt, op->dst.bo,
@@ -1276,25 +1385,7 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna,
 					 const struct sna_fill_op *op,
 					 const BoxRec *box)
 {
-	struct kgem *kgem = &sna->kgem;
-	uint32_t *b;
-
-	DBG(("%s: (%d, %d), (%d, %d): %08x\n", __FUNCTION__,
-	     box->x1, box->y1, box->x2, box->y2,
-	     op->base.u.blt.pixel));
-
-	assert(box->x1 >= 0);
-	assert(box->y1 >= 0);
-	assert(box->y2 * op->base.u.blt.bo[0]->pitch <= op->base.u.blt.bo[0]->size);
-
-	if (!kgem_check_batch(kgem, 3))
-		sna_blt_fill_begin(sna, &op->base.u.blt);
-
-	b = kgem->batch + kgem->nbatch;
-	kgem->nbatch += 3;
-
-	b[0] = op->base.u.blt.cmd;
-	*(uint64_t *)(b+1) = *(uint64_t *)box;
+	_sna_blt_fill_box(sna, &op->base.u.blt, box);
 }
 
 fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
@@ -1302,59 +1393,7 @@ fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
 					   const BoxRec *box,
 					   int nbox)
 {
-	struct kgem *kgem = &sna->kgem;
-	uint32_t cmd = op->base.u.blt.cmd;
-
-	DBG(("%s: %08x x %d\n", __FUNCTION__,
-	     op->base.u.blt.pixel, nbox));
-
-	if (!kgem_check_batch(kgem, 3))
-		sna_blt_fill_begin(sna, &op->base.u.blt);
-
-	do {
-		uint32_t *b = kgem->batch + kgem->nbatch;
-		int nbox_this_time;
-
-		nbox_this_time = nbox;
-		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
-			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
-		assert(nbox_this_time);
-		nbox -= nbox_this_time;
-
-		kgem->nbatch += 3 * nbox_this_time;
-		while (nbox_this_time >= 8) {
-			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
-			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
-			b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++;
-			b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++;
-			b[12] = cmd; *(uint64_t *)(b+13) = *(uint64_t *)box++;
-			b[15] = cmd; *(uint64_t *)(b+16) = *(uint64_t *)box++;
-			b[18] = cmd; *(uint64_t *)(b+19) = *(uint64_t *)box++;
-			b[21] = cmd; *(uint64_t *)(b+22) = *(uint64_t *)box++;
-			b += 24;
-			nbox_this_time -= 8;
-		}
-		if (nbox_this_time & 4) {
-			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
-			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
-			b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++;
-			b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++;
-			b += 12;
-		}
-		if (nbox_this_time & 2) {
-			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
-			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
-			b += 6;
-		}
-		if (nbox_this_time & 1) {
-			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
-		}
-
-		if (!nbox)
-			return;
-
-		sna_blt_fill_begin(sna, &op->base.u.blt);
-	} while (1);
+	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
 }
 
 static void sna_blt_fill_op_done(struct sna *sna,
commit 56f65e65204f1ecbad598419e525e69f6179f9c7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Oct 23 20:40:21 2011 +0100

    sna: Inline box_intersect()
    
    Most of this branchy function is spent in manipulating the stack for the
    function call, so inline it.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9b6b2c7..8df94eb 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1729,7 +1729,7 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			0, NULL);
 }
 
-static Bool
+inline static Bool
 box_intersect(BoxPtr a, const BoxRec *b)
 {
 	if (a->x1 < b->x1)
commit f65e73f8a3a013e79b753d625f79994408e717f7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Oct 23 19:55:40 2011 +0100

    sna: Use fill->boxes to unroll FillSpans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index d5605be..9b6b2c7 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1766,6 +1766,33 @@ sna_copy_init_blt(struct sna_copy_op *copy,
 	return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
 }
 
+static const BoxRec *
+find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
+{
+    const BoxRec *mid;
+
+    if (end == begin)
+	return end;
+
+    if (end - begin == 1) {
+	if (begin->y2 > y)
+	    return begin;
+	else
+	    return end;
+    }
+
+    mid = begin + (end - begin) / 2;
+    if (mid->y2 > y)
+	/* If no box is found in [begin, mid], the function
+	 * will return @mid, which is then known to be the
+	 * correct answer.
+	 */
+	return find_clip_box_for_y(begin, mid, y);
+    else
+	return find_clip_box_for_y(mid, end, y);
+}
+
+
 static Bool
 sna_fill_spans_blt(DrawablePtr drawable,
 		   struct kgem_bo *bo, struct sna_damage **damage,
@@ -1777,6 +1804,7 @@ sna_fill_spans_blt(DrawablePtr drawable,
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	int16_t dx, dy;
 	struct sna_fill_op fill;
+	BoxRec box[512], *b = box, *const last_box = box + ARRAY_SIZE(box);
 	static void * const jump[] = {
 		&&no_damage_translate,
 		&&damage_translate,
@@ -1805,17 +1833,25 @@ no_damage_translate:
 	dx += drawable->x;
 	dy += drawable->y;
 no_damage:
-	do {
-		BoxRec box;
-
-		box.x1 = pt->x + dx;
-		box.x2 = box.x1 + *width++;
-		box.y1 = pt->y + dy;
-		box.y2 = box.y1 + 1;
-		pt++;
+	{
+		unsigned offset = dx|dy;
+		do {
+			*(DDXPointRec *)b = *pt++;
+			if (offset) {
+				b->x1 += dx;
+				b->y1 += dy;
+			}
+			b->x2 = b->x1 + (int)*width++;
+			b->y2 = b->y1 + 1;
 
-		fill.box(sna, &fill, &box);
-	} while (--n);
+			if (++b == last_box) {
+				fill.boxes(sna, &fill, box, last_box - box);
+				b = box;
+			}
+		} while (--n);
+		if (b != box)
+			fill.boxes(sna, &fill, box, b - box);
+	}
 	goto done;
 
 damage_translate:
@@ -1823,18 +1859,22 @@ damage_translate:
 	dy += drawable->y;
 damage:
 	do {
-		BoxRec box;
-
-		box.x1 = pt->x + dx;
-		box.x2 = box.x1 + *width++;
-		box.y1 = pt->y + dy;
-		box.y2 = box.y1 + 1;
-		pt++;
+		*(DDXPointRec *)b = *pt++;
+		b->x1 += dx;
+		b->y1 += dy;
+		b->x2 = b->x1 + (int)*width++;
+		b->y2 = b->y1 + 1;
 
-		fill.box(sna, &fill, &box);
-		assert_pixmap_contains_box(pixmap, &box);
-		sna_damage_add_box(damage, &box);
+		if (++b == last_box) {
+			fill.boxes(sna, &fill, box, last_box - box);
+			sna_damage_add_boxes(damage, box, last_box - box, 0, 0);
+			b = box;
+		}
 	} while (--n);
+	if (b != box) {
+		fill.boxes(sna, &fill, box, b - box);
+		sna_damage_add_boxes(damage, box, b - box, 0, 0);
+	}
 	goto done;
 
 	{
@@ -1860,29 +1900,29 @@ no_damage_clipped:
 
 		if (clip.data == NULL) {
 			do {
-				BoxRec box;
-
-				box.x1 = pt->x;
-				box.y1 = pt->y;
-				box.x2 = box.x1 + (int)*width++;
-				box.y2 = box.y1 + 1;
-				pt++;
+				*(DDXPointRec *)b = *pt++;
+				b->x2 = b->x1 + (int)*width++;
+				b->y2 = b->y1 + 1;
 
-				if (box_intersect(&box, &clip.extents)) {
-					box.x1 += dx;
-					box.x2 += dx;
-					box.y1 += dy;
-					box.y2 += dy;
-					fill.box(sna, &fill, &box);
+				if (box_intersect(b, &clip.extents)) {
+					b->x1 += dx;
+					b->x2 += dx;
+					b->y1 += dy;
+					b->y2 += dy;
+					if (++b == last_box) {
+						fill.boxes(sna, &fill, box, last_box - box);
+						b = box;
+					}
 				}
 			} while (--n);
 		} else {
+			const BoxRec * const clip_start = RegionBoxptr(&clip);
+			const BoxRec * const clip_end = clip_start + clip.data->numRects;
 			do {
-				int nc = clip.data->numRects;
-				const BoxRec *b = RegionBoxptr(&clip);
 				int16_t X1 = pt->x;
 				int16_t y = pt->y;
 				int16_t X2 = X1 + (int)*width;
+				const BoxRec *c;
 
 				pt++;
 				width++;
@@ -1899,31 +1939,41 @@ no_damage_clipped:
 				if (X1 >= X2)
 					continue;
 
-				y += dy;
-				do {
-					if (b->y1 <= y && y < b->y2) {
-						int x1 = b->x1;
-						int x2 = b->x2;
-
-						if (x1 < X1)
-							x1 = X1;
-						x1 += dx;
-						if (x1 < 0)
-							x1 = 0;
-						if (x2 > X2)
-							x2 = X2;
-						x2 += dx;
-						if (x2 > pixmap->drawable.width)
-							x2 = pixmap->drawable.width;
-
-						if (x2 > x1)
-							fill.blt(sna, &fill, x1, y, x2-x1, 1);
+				c = find_clip_box_for_y(clip_start,
+							clip_end,
+							y);
+				while (c != clip_end) {
+					if (y + 1 <= c->y1)
+						break;
+
+					if (X2 <= c->x1)
+						continue;
+					if (X1 >= c->x2)
+						break;
+
+					b->x1 = c->x1;
+					b->x2 = c->x2;
+					c++;
+
+					if (b->x1 < X1)
+						b->x1 = X1;
+					if (b->x2 > X2)
+						b->x2 = X2;
+
+					b->x1 += dx;
+					b->x2 += dx;
+					b->y1 = y + dy;
+					b->y2 = b->y1 + 1;
+					if (++b == last_box) {
+						fill.boxes(sna, &fill, box, last_box - box);
+						b = box;
 					}
-					b++;
-				} while (--nc);
+				}
 			} while (--n);
 			RegionUninit(&clip);
 		}
+		if (b != box)
+			fill.boxes(sna, &fill, box, b - box);
 		goto done;
 	}
 
@@ -1950,31 +2000,30 @@ damage_clipped:
 
 		if (clip.data == NULL) {
 			do {
-				BoxRec box;
-
-				box.x1 = pt->x;
-				box.y1 = pt->y;
-				box.x2 = box.x1 + (int)*width++;
-				box.y2 = box.y1 + 1;
-				pt++;
+				*(DDXPointRec *)b = *pt++;
+				b->x2 = b->x1 + (int)*width++;
+				b->y2 = b->y1 + 1;
 
-				if (box_intersect(&box, &clip.extents)) {
-					box.x1 += dx;
-					box.x2 += dx;
-					box.y1 += dy;
-					box.y2 += dy;
-					fill.box(sna, &fill, &box);
-					assert_pixmap_contains_box(pixmap, &box);
-					sna_damage_add_box(damage, &box);
+				if (box_intersect(b, &clip.extents)) {
+					b->x1 += dx;
+					b->x2 += dx;
+					b->y1 += dy;
+					b->y2 += dy;
+					if (++b == last_box) {
+						fill.boxes(sna, &fill, box, last_box - box);
+						sna_damage_add_boxes(damage, box, b - box, 0, 0);
+						b = box;
+					}
 				}
 			} while (--n);
 		} else {
+			const BoxRec * const clip_start = RegionBoxptr(&clip);
+			const BoxRec * const clip_end = clip_start + clip.data->numRects;
 			do {
-				int nc = clip.data->numRects;
-				const BoxRec *b = RegionBoxptr(&clip);
 				int16_t X1 = pt->x;
 				int16_t y = pt->y;
 				int16_t X2 = X1 + (int)*width;
+				const BoxRec *c;
 
 				pt++;
 				width++;
@@ -1991,41 +2040,44 @@ damage_clipped:
 				if (X1 >= X2)
 					continue;
 
-				y += dy;
-				do {
-					if (b->y1 <= y && y < b->y2) {
-						int x1 = b->x1;
-						int x2 = b->x2;
-
-						if (x1 < X1)
-							x1 = X1;
-						x1 += dx;
-						if (x1 < 0)
-							x1 = 0;
-						if (x2 > X2)
-							x2 = X2;
-						x2 += dx;
-						if (x2 > pixmap->drawable.width)
-							x2 = pixmap->drawable.width;
-
-						if (x2 > x1) {
-							BoxRec box;
-
-							box.x1 = x1;
-							box.y1 = y;
-							box.x2 = x2;
-							box.y2 = box.y1 + 1;
-
-							fill.box(sna, &fill, &box);
-							assert_pixmap_contains_box(pixmap, &box);
-							sna_damage_add_box(damage, &box);
-						}
+				c = find_clip_box_for_y(clip_start,
+							clip_end,
+							y);
+				while (c != clip_end) {
+					if (y + 1 <= c->y1)
+						break;
+
+					if (X2 <= c->x1)
+						continue;
+					if (X1 >= c->x2)
+						break;
+
+					b->x1 = c->x1;
+					b->x2 = c->x2;
+					c++;
+
+					if (b->x1 < X1)
+						b->x1 = X1;
+					if (b->x2 > X2)
+						b->x2 = X2;
+
+					b->x1 += dx;
+					b->x2 += dx;
+					b->y1 = y + dy;
+					b->y2 = b->y1 + 1;
+					if (++b == last_box) {
+						fill.boxes(sna, &fill, box, last_box - box);
+						sna_damage_add_boxes(damage, box, last_box - box, 0, 0);
+						b = box;
 					}
-					b++;
-				} while (--nc);
+				}
 			} while (--n);
 			RegionUninit(&clip);
 		}
+		if (b != box) {
+			fill.boxes(sna, &fill, box, b - box);
+			sna_damage_add_boxes(damage, box, b - box, 0, 0);
+		}
 		goto done;
 	}
 
commit 94f9cd40dcdd444d3b5a5598deb0f3d04026df3a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Oct 23 19:11:35 2011 +0100

    sna: Use fill->boxes to unroll PolyPoint
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 81459b2..d5605be 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2274,6 +2274,7 @@ sna_poly_point_blt(DrawablePtr drawable,
 	struct sna *sna = to_sna_from_drawable(drawable);
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	RegionPtr clip = fbGetCompositeClip(gc);
+	BoxRec box[512], *b = box, * const last_box = box + ARRAY_SIZE(box);
 	struct sna_fill_op fill;
 	DDXPointRec last;
 	int16_t dx, dy;
@@ -2295,53 +2296,61 @@ sna_poly_point_blt(DrawablePtr drawable,
 
 		sna_damage_add_points(damage, pt, n, last.x, last.y);
 		do {
-			BoxRec r;
-
-			r.x1 = pt->x;
-			r.y1 = pt->y;
+			b->x1 = pt->x;
+			b->y1 = pt->y;
 			pt++;
 
-			r.x1 += last.x;
-			r.y1 += last.y;
+			b->x1 += last.x;
+			b->y1 += last.y;
 			if (mode == CoordModePrevious) {
-				last.x = r.x1;
-				last.y = r.y1;
+				last.x = b->x1;
+				last.y = b->y1;
 			}
 
-			r.x2 = r.x1 + 1;
-			r.y2 = r.y1 + 1;
-			fill.box(sna, &fill, &r);
+			b->x2 = b->x1 + 1;
+			b->y2 = b->y1 + 1;
+			if (++b == last_box) {
+				fill.boxes(sna, &fill, box, last_box - box);
+				b = box;
+			}
 		} while (--n);
-	} else while (n--) {
-		int x, y;
-
-		x = pt->x;
-		y = pt->y;
-		pt++;
-		if (mode == CoordModePrevious) {
-			x += last.x;
-			y += last.y;
-			last.x = x;
-			last.y = y;
-		} else {
-			x += drawable->x;
-			y += drawable->y;
-		}
-
-		if (RegionContainsPoint(clip, x, y, NULL)) {
-			fill.blt(sna, &fill, x + dx, y + dy, 1, 1);
-			if (damage) {
-				BoxRec box;
+		if (b != box)
+			fill.boxes(sna, &fill, box, last_box - box);
+	} else {
+		while (n--) {
+			int x, y;
 
-				box.x1 = x + dx;
-				box.y1 = y + dy;
-				box.x2 = box.x1 + 1;
-				box.y2 = box.y1 + 1;
+			x = pt->x;
+			y = pt->y;
+			pt++;
+			if (mode == CoordModePrevious) {
+				x += last.x;
+				y += last.y;
+				last.x = x;
+				last.y = y;
+			} else {
+				x += drawable->x;
+				y += drawable->y;
+			}
 
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
+			if (RegionContainsPoint(clip, x, y, NULL)) {
+				b->x1 = x + dx;
+				b->y1 = y + dy;
+				b->x2 = b->x1 + 1;
+				b->y2 = b->y1 + 1;
+				if (++b == last_box){
+					fill.boxes(sna, &fill, box, last_box - box);
+					if (damage)
+						sna_damage_add_boxes(damage, box, last_box-box, 0, 0);
+					b = box;
+				}
 			}
 		}
+		if (b != box){
+			fill.boxes(sna, &fill, box, b - box);
+			if (damage)
+				sna_damage_add_boxes(damage, box, b-box, 0, 0);
+		}
 	}
 	fill.done(sna, &fill);
 	return TRUE;
commit 5e08ca75a5eb852c09b9acf6004668bc63f1fe6c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Oct 23 17:53:54 2011 +0100

    sna: Use the new fill->boxes for PolyRectangle
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 82ca094..81459b2 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3893,49 +3893,37 @@ no_damage_zero:
 	dy += drawable->y;
 
 	do {
-		BoxRec box;
+		BoxRec box[4];
 
 		if (r->width <= 1 || r->height <= 1) {
-			box.x1 = r->x + dx;
-			box.y1 = r->y + dy;
-			box.x2 = box.x1 + r->width + 1;
-			box.y2 = box.y1 + r->height + 1;
+			box[0].x1 = r->x + dx;
+			box[0].y1 = r->y + dy;
+			box[0].x2 = box[0].x1 + r->width + 1;
+			box[0].y2 = box[0].y1 + r->height + 1;
 			DBG(("%s: blt (%d, %d), (%d, %d)\n",
 			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
+			     box[0].x1, box[0].y1, box[0].x2, box[0].y2));
+			fill.box(sna, &fill, &box[0]);
 		} else {
-			box.x1 = r->x + dx;
-			box.y1 = r->y + dy;
-			box.x2 = box.x1 + r->width + 1;
-			box.y2 = box.y1 + 1;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
+			box[0].x1 = r->x + dx;
+			box[0].y1 = r->y + dy;
+			box[0].x2 = box[0].x1 + r->width + 1;
+			box[0].y2 = box[0].y1 + 1;
 
-			box.y1 += r->height;
-			box.y2 += r->height;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
+			box[1] = box[0];
+			box[1].y1 += r->height;
+			box[1].y2 += r->height;
 
-			box.y1 = r->y + dy + 1;
-			box.y2 = box.y1 + r->height - 1;
-			box.x1 = r->x + dx;
-			box.x2 = box.x1 + 1;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
+			box[2].y1 = r->y + dy + 1;
+			box[2].y2 = box[2].y1 + r->height - 1;
+			box[2].x1 = r->x + dx;
+			box[2].x2 = box[2].x1 + 1;
 
-			box.x1 += r->width;
-			box.x2 += r->width;
-			DBG(("%s: blt (%d, %d), (%d, %d)\n",
-			     __FUNCTION__,
-			     box.x1, box.y1, box.x2, box.y2));
-			fill.box(sna, &fill, &box);
+			box[3] = box[2];
+			box[3].x1 += r->width;
+			box[3].x2 += r->width;
+
+			fill.boxes(sna, &fill, box, 4);
 		}
 		r++;
 	} while (--n);
@@ -4092,7 +4080,7 @@ no_damage_wide:
 		dy += drawable->y;
 
 		do {
-			BoxRec box;
+			BoxRec box[4];
 			int16_t x = r->x + dx;
 			int16_t y = r->y + dy;
 			int16_t width = r->width;
@@ -4101,42 +4089,40 @@ no_damage_wide:
 
 			if (height < offset2 || width < offset1) {
 				if (height == 0) {
-					box.x1 = x;
-					box.x2 = x + width + 1;
+					box[0].x1 = x;
+					box[0].x2 = x + width + 1;
 				} else {
-					box.x1 = x - offset1;
-					box.x2 = box.x1 + width + offset2;
+					box[0].x1 = x - offset1;
+					box[0].x2 = box[0].x1 + width + offset2;
 				}
 				if (width == 0) {
-					box.y1 = y;
-					box.y2 = y + height + 1;
+					box[0].y1 = y;
+					box[0].y2 = y + height + 1;
 				} else {
-					box.y1 = y - offset1;
-					box.y2 = box.y1 + height + offset2;
+					box[0].y1 = y - offset1;
+					box[0].y2 = box[0].y1 + height + offset2;
 				}
-				fill.box(sna, &fill, &box);
+				fill.box(sna, &fill, &box[0]);
 			} else {
-				box.x1 = x - offset1;
-				box.x2 = box.x1 + width + offset2;
-				box.y1 = y - offset1;
-				box.y2 = box.y1 + offset2;
-				fill.box(sna, &fill, &box);
+				box[0].x1 = x - offset1;
+				box[0].x2 = box[0].x1 + width + offset2;
+				box[0].y1 = y - offset1;
+				box[0].y2 = box[0].y1 + offset2;
 
-				box.x1 = x - offset1;
-				box.x2 = box.x1 + offset2;
-				box.y1 = y + offset3;
-				box.y2 = y + height - offset1;
-				fill.box(sna, &fill, &box);
+				box[1] = box[0];
+				box[1].y1 = y + height - offset1;
+				box[1].y2 = box[1].y1 + offset2;
 
-				box.x1 = x + width - offset1;
-				box.x2 = box.x1 + offset2;
-				fill.box(sna, &fill, &box);
+				box[2].x1 = x - offset1;
+				box[2].x2 = box[2].x1 + offset2;
+				box[2].y1 = y + offset3;
+				box[2].y2 = y + height - offset1;
 
-				box.x1 = x - offset1;
-				box.x2 = box.x1 + width + offset2;
-				box.y1 = y + height - offset1;
-				box.y2 = box.y1 + offset2;
-				fill.box(sna, &fill, &box);
+				box[3] = box[2];
+				box[3].x1 = x + width - offset1;
+				box[3].x2 = box[3].x1 + offset2;
+
+				fill.boxes(sna, &fill, box, 4);
 			}
 
 		} while (--n);
commit efdc7985e2f619ca94cf6ad77ead4f6867ceedc5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Oct 26 14:57:28 2011 +0100

    self-copy-damage

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index e73804d..82ca094 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1299,7 +1299,8 @@ sna_self_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			goto fallback;
 		}
 
-		sna_damage_add_boxes(&priv->gpu_damage, box, n, tx, ty);
+		if (!priv->gpu_only)
+			sna_damage_add_boxes(&priv->gpu_damage, box, n, tx, ty);
 	} else {
 		FbBits *dst_bits, *src_bits;
 		int stride, bpp;
commit 0ed3426a810336b666604d34c10f996f318ebf82
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Oct 21 23:45:28 2011 +0100

    sna: Convert diagonal zero-width lines into blits
    
    This is slower than falling back to swrast for x11perf (up to 4x slower
    on SNB), it is still faster than doing that rasterisation through a
    WC-mapping and much faster in ordinary usage due to avoiding the
    readback hit.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 7c72f41..3ec2e08 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -2021,9 +2021,9 @@ static void gen2_emit_fill_state(struct sna *sna,
 }
 
 static void
-gen2_render_fill_blt(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     int16_t x, int16_t y, int16_t w, int16_t h)
+gen2_render_fill_op_blt(struct sna *sna,
+			const struct sna_fill_op *op,
+			int16_t x, int16_t y, int16_t w, int16_t h)
 {
 	if (!gen2_get_rectangles(sna, &op->base, 1)) {
 		gen2_emit_fill_state(sna, &op->base);
@@ -2039,9 +2039,9 @@ gen2_render_fill_blt(struct sna *sna,
 }
 
 fastcall static void
-gen2_render_fill_box(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     const BoxRec *box)
+gen2_render_fill_op_box(struct sna *sna,
+			const struct sna_fill_op *op,
+			const BoxRec *box)
 {
 	if (!gen2_get_rectangles(sna, &op->base, 1)) {
 		gen2_emit_fill_state(sna, &op->base);
@@ -2056,8 +2056,37 @@ gen2_render_fill_box(struct sna *sna,
 	VERTEX(box->y1);
 }
 
+fastcall static void
+gen2_render_fill_op_boxes(struct sna *sna,
+			  const struct sna_fill_op *op,
+			  const BoxRec *box,
+			  int nbox)
+{
+	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2, n));
+
+	do {
+		int nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
+		if (nbox_this_time == 0) {
+			gen2_emit_fill_state(sna, &op->base);
+			nbox_this_time = gen2_get_rectangles(sna, &op->base, nbox);
+		}
+		nbox -= nbox_this_time;
+
+		do {
+			VERTEX(box->x2);
+			VERTEX(box->y2);
+			VERTEX(box->x1);
+			VERTEX(box->y2);
+			VERTEX(box->x1);
+			VERTEX(box->y1);
+			box++;
+		} while (--nbox_this_time);
+	} while (nbox);
+}
+
 static void
-gen2_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+gen2_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 {
 	gen2_vertex_flush(sna);
 	_kgem_set_mode(&sna->kgem, KGEM_RENDER);
@@ -2114,9 +2143,10 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
 				    tmp);
 	}
 
-	tmp->blt  = gen2_render_fill_blt;
-	tmp->box  = gen2_render_fill_box;
-	tmp->done = gen2_render_fill_done;
+	tmp->blt   = gen2_render_fill_op_blt;
+	tmp->box   = gen2_render_fill_op_box;
+	tmp->boxes = gen2_render_fill_op_boxes;
+	tmp->done  = gen2_render_fill_op_done;
 
 	gen2_emit_fill_state(sna, &tmp->base);
 	return TRUE;
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 6d91f32..b9ce45f 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -3776,9 +3776,9 @@ gen3_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen3_render_fill_blt(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     int16_t x, int16_t y, int16_t w, int16_t h)
+gen3_render_fill_op_blt(struct sna *sna,
+			const struct sna_fill_op *op,
+			int16_t x, int16_t y, int16_t w, int16_t h)
 {
 	if (!gen3_get_rectangles(sna, &op->base, 1)) {
 		gen3_emit_composite_state(sna, &op->base);
@@ -3794,9 +3794,9 @@ gen3_render_fill_blt(struct sna *sna,
 }
 
 fastcall static void
-gen3_render_fill_box(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     const BoxRec *box)
+gen3_render_fill_op_box(struct sna *sna,
+			const struct sna_fill_op *op,
+			const BoxRec *box)
 {
 	if (!gen3_get_rectangles(sna, &op->base, 1)) {
 		gen3_emit_composite_state(sna, &op->base);
@@ -3811,8 +3811,37 @@ gen3_render_fill_box(struct sna *sna,
 	OUT_VERTEX(box->y1);
 }
 
+fastcall static void
+gen3_render_fill_op_boxes(struct sna *sna,
+			  const struct sna_fill_op *op,
+			  const BoxRec *box,
+			  int nbox)
+{
+	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2, n));
+
+	do {
+		int nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+		if (nbox_this_time == 0) {
+			gen3_emit_composite_state(sna, &op->base);
+			nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox);
+		}
+		nbox -= nbox_this_time;
+
+		do {
+			OUT_VERTEX(box->x2);
+			OUT_VERTEX(box->y2);
+			OUT_VERTEX(box->x1);
+			OUT_VERTEX(box->y2);
+			OUT_VERTEX(box->x1);
+			OUT_VERTEX(box->y1);
+			box++;
+		} while (--nbox_this_time);
+	} while (nbox);
+}
+
 static void
-gen3_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+gen3_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 {
 	gen3_vertex_flush(sna);
 	_kgem_set_mode(&sna->kgem, KGEM_RENDER);
@@ -3859,6 +3888,8 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
 	tmp->base.dst.bo = dst_bo;
 	tmp->base.floats_per_vertex = 2;
 	tmp->base.floats_per_rect = 6;
+	tmp->base.need_magic_ca_pass = 0;
+	tmp->base.has_component_alpha = 0;
 
 	tmp->base.src.u.gen3.type = SHADER_CONSTANT;
 	tmp->base.src.u.gen3.mode =
@@ -3869,9 +3900,10 @@ gen3_render_fill(struct sna *sna, uint8_t alu,
 	if (!kgem_check_bo(&sna->kgem, dst_bo, NULL))
 		kgem_submit(&sna->kgem);
 
-	tmp->blt  = gen3_render_fill_blt;
-	tmp->box  = gen3_render_fill_box;
-	tmp->done = gen3_render_fill_done;
+	tmp->blt   = gen3_render_fill_op_blt;
+	tmp->box   = gen3_render_fill_op_box;
+	tmp->boxes = gen3_render_fill_op_boxes;
+	tmp->done  = gen3_render_fill_op_done;
 
 	gen3_emit_composite_state(sna, &tmp->base);
 	gen3_align_vertex(sna, &tmp->base);
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 0092f60..e4a40fc 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2497,24 +2497,38 @@ gen4_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen4_render_fill_blt(struct sna *sna, const struct sna_fill_op *op,
-		     int16_t x, int16_t y, int16_t w, int16_t h)
+gen4_render_fill_op_blt(struct sna *sna, const struct sna_fill_op *op,
+			int16_t x, int16_t y, int16_t w, int16_t h)
 {
 	gen4_render_fill_rectangle(sna, &op->base, x, y, w, h);
 }
 
 fastcall static void
-gen4_render_fill_box(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     const BoxRec *box)
+gen4_render_fill_op_box(struct sna *sna,
+			const struct sna_fill_op *op,
+			const BoxRec *box)
 {
 	gen4_render_fill_rectangle(sna, &op->base,
 				   box->x1, box->y1,
 				   box->x2-box->x1, box->y2-box->y1);
 }
 
+fastcall static void
+gen4_render_fill_op_boxes(struct sna *sna,
+			  const struct sna_fill_op *op,
+			  const BoxRec *box,
+			  int nbox)
+{
+	do {
+		gen4_render_fill_rectangle(sna, &op->base,
+					   box->x1, box->y1,
+					   box->x2-box->x1, box->y2-box->y1);
+		box++;
+	} while (--nbox);
+}
+
 static void
-gen4_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+gen4_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 {
 	gen4_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -2573,6 +2587,8 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 
 	op->base.is_affine = TRUE;
 	op->base.floats_per_vertex = 3;
+	op->base.need_magic_ca_pass = 0;
+	op->base.has_component_alpha = 0;
 	op->base.u.gen4.wm_kernel = WM_KERNEL;
 	op->base.u.gen4.ve_id = 1;
 
@@ -2582,9 +2598,10 @@ gen4_render_fill(struct sna *sna, uint8_t alu,
 	gen4_fill_bind_surfaces(sna, &op->base);
 	gen4_align_vertex(sna, &op->base);
 
-	op->blt  = gen4_render_fill_blt;
-	op->box  = gen4_render_fill_box;
-	op->done = gen4_render_fill_done;
+	op->blt   = gen4_render_fill_op_blt;
+	op->box   = gen4_render_fill_op_box;
+	op->boxes = gen4_render_fill_op_boxes;
+	op->done  = gen4_render_fill_op_done;
 	return TRUE;
 }
 
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 041e918..e72283e 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2483,9 +2483,9 @@ gen5_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen5_render_fill_blt(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     int16_t x, int16_t y, int16_t w, int16_t h)
+gen5_render_fill_op_blt(struct sna *sna,
+			const struct sna_fill_op *op,
+			int16_t x, int16_t y, int16_t w, int16_t h)
 {
 	DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h));
 
@@ -2508,9 +2508,9 @@ gen5_render_fill_blt(struct sna *sna,
 }
 
 fastcall static void
-gen5_render_fill_box(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     const BoxRec *box)
+gen5_render_fill_op_box(struct sna *sna,
+			const struct sna_fill_op *op,
+			const BoxRec *box)
 {
 	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
 	     box->x1, box->y1, box->x2, box->y2));
@@ -2533,9 +2533,43 @@ gen5_render_fill_box(struct sna *sna,
 	OUT_VERTEX_F(0);
 }
 
+fastcall static void
+gen5_render_fill_op_boxes(struct sna *sna,
+			  const struct sna_fill_op *op,
+			  const BoxRec *box,
+			  int nbox)
+{
+	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2, n));
+
+	do {
+		int nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox);
+		if (nbox_this_time == 0) {
+			gen5_fill_bind_surfaces(sna, &op->base);
+			nbox_this_time = gen5_get_rectangles(sna, &op->base, nbox);
+		}
+		nbox -= nbox_this_time;
+
+		do {
+			OUT_VERTEX(box->x2, box->y2);
+			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(1);
+
+			OUT_VERTEX(box->x1, box->y2);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(1);
+
+			OUT_VERTEX(box->x1, box->y1);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(0);
+			box++;
+		} while (--nbox_this_time);
+	} while (nbox);
+}
+
 static void
-gen5_render_fill_done(struct sna *sna,
-		      const struct sna_fill_op *op)
+gen5_render_fill_op_done(struct sna *sna,
+			 const struct sna_fill_op *op)
 {
 	gen5_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -2578,6 +2612,9 @@ gen5_render_fill(struct sna *sna, uint8_t alu,
 	op->base.dst.bo = dst_bo;
 	op->base.dst.x = op->base.dst.y = 0;
 
+	op->base.need_magic_ca_pass = 0;
+	op->base.has_component_alpha = 0;
+
 	op->base.src.bo =
 		sna_render_get_solid(sna,
 				     sna_rgba_for_color(color,
@@ -2600,9 +2637,10 @@ gen5_render_fill(struct sna *sna, uint8_t alu,
 	gen5_fill_bind_surfaces(sna, &op->base);
 	gen5_align_vertex(sna, &op->base);
 
-	op->blt  = gen5_render_fill_blt;
-	op->box  = gen5_render_fill_box;
-	op->done = gen5_render_fill_done;
+	op->blt   = gen5_render_fill_op_blt;
+	op->box   = gen5_render_fill_op_box;
+	op->boxes = gen5_render_fill_op_boxes;
+	op->done  = gen5_render_fill_op_done;
 	return TRUE;
 }
 
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 838819d..05e6d63 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2696,9 +2696,9 @@ gen6_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen6_render_fill_blt(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     int16_t x, int16_t y, int16_t w, int16_t h)
+gen6_render_op_fill_blt(struct sna *sna,
+			const struct sna_fill_op *op,
+			int16_t x, int16_t y, int16_t w, int16_t h)
 {
 	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
 
@@ -2721,9 +2721,9 @@ gen6_render_fill_blt(struct sna *sna,
 }
 
 fastcall static void
-gen6_render_fill_box(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     const BoxRec *box)
+gen6_render_op_fill_box(struct sna *sna,
+			const struct sna_fill_op *op,
+			const BoxRec *box)
 {
 	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
 	     box->x1, box->y1, box->x2, box->y2));
@@ -2746,8 +2746,42 @@ gen6_render_fill_box(struct sna *sna,
 	OUT_VERTEX_F(0);
 }
 
+fastcall static void
+gen6_render_op_fill_boxes(struct sna *sna,
+			  const struct sna_fill_op *op,
+			  const BoxRec *box,
+			  int nbox)
+{
+	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2, n));
+
+	do {
+		int nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
+		if (nbox_this_time == 0) {
+			gen6_emit_fill_state(sna, &op->base);
+			nbox_this_time = gen6_get_rectangles(sna, &op->base, nbox);
+		}
+		nbox -= nbox_this_time;
+
+		do {
+			OUT_VERTEX(box->x2, box->y2);
+			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(1);
+
+			OUT_VERTEX(box->x1, box->y2);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(1);
+
+			OUT_VERTEX(box->x1, box->y1);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(0);
+			box++;
+		} while (--nbox_this_time);
+	} while (nbox);
+}
+
 static void
-gen6_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+gen6_render_op_fill_done(struct sna *sna, const struct sna_fill_op *op)
 {
 	gen6_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -2807,6 +2841,8 @@ gen6_render_fill(struct sna *sna, uint8_t alu,
 	op->base.mask.repeat = SAMPLER_EXTEND_NONE;
 
 	op->base.is_affine = TRUE;
+	op->base.has_component_alpha = FALSE;
+	op->base.need_magic_ca_pass = FALSE;
 	op->base.floats_per_vertex = 3;
 	op->base.floats_per_rect = 9;
 
@@ -2821,9 +2857,10 @@ gen6_render_fill(struct sna *sna, uint8_t alu,
 	gen6_emit_fill_state(sna, &op->base);
 	gen6_align_vertex(sna, &op->base);
 
-	op->blt  = gen6_render_fill_blt;
-	op->box  = gen6_render_fill_box;
-	op->done = gen6_render_fill_done;
+	op->blt  = gen6_render_op_fill_blt;
+	op->box  = gen6_render_op_fill_box;
+	op->boxes = gen6_render_op_fill_boxes;
+	op->done = gen6_render_op_fill_done;
 	return TRUE;
 }
 
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index ccc0037..05b65f3 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2846,9 +2846,9 @@ gen7_render_fill_boxes(struct sna *sna,
 }
 
 static void
-gen7_render_fill_blt(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     int16_t x, int16_t y, int16_t w, int16_t h)
+gen7_render_fill_op_blt(struct sna *sna,
+			const struct sna_fill_op *op,
+			int16_t x, int16_t y, int16_t w, int16_t h)
 {
 	DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h));
 
@@ -2871,9 +2871,9 @@ gen7_render_fill_blt(struct sna *sna,
 }
 
 fastcall static void
-gen7_render_fill_box(struct sna *sna,
-		     const struct sna_fill_op *op,
-		     const BoxRec *box)
+gen7_render_fill_op_box(struct sna *sna,
+			const struct sna_fill_op *op,
+			const BoxRec *box)
 {
 	DBG(("%s: (%d, %d),(%d, %d)\n", __FUNCTION__,
 	     box->x1, box->y1, box->x2, box->y2));
@@ -2896,8 +2896,42 @@ gen7_render_fill_box(struct sna *sna,
 	OUT_VERTEX_F(0);
 }
 
+fastcall static void
+gen7_render_fill_op_boxes(struct sna *sna,
+			  const struct sna_fill_op *op,
+			  const BoxRec *box,
+			  int nbox)
+{
+	DBG(("%s: (%d, %d),(%d, %d)... x %d\n", __FUNCTION__,
+	     box->x1, box->y1, box->x2, box->y2, n));
+
+	do {
+		int nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox);
+		if (nbox_this_time == 0) {
+			gen7_emit_fill_state(sna, &op->base);
+			nbox_this_time = gen7_get_rectangles(sna, &op->base, nbox);
+		}
+		nbox -= nbox_this_time;
+
+		do {
+			OUT_VERTEX(box->x2, box->y2);
+			OUT_VERTEX_F(1);
+			OUT_VERTEX_F(1);
+
+			OUT_VERTEX(box->x1, box->y2);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(1);
+
+			OUT_VERTEX(box->x1, box->y1);
+			OUT_VERTEX_F(0);
+			OUT_VERTEX_F(0);
+			box++;
+		} while (--nbox_this_time);
+	} while (nbox);
+}
+
 static void
-gen7_render_fill_done(struct sna *sna, const struct sna_fill_op *op)
+gen7_render_fill_op_done(struct sna *sna, const struct sna_fill_op *op)
 {
 	gen7_vertex_flush(sna);
 	kgem_bo_destroy(&sna->kgem, op->base.src.bo);
@@ -2971,9 +3005,10 @@ gen7_render_fill(struct sna *sna, uint8_t alu,
 	gen7_emit_fill_state(sna, &op->base);
 	gen7_align_vertex(sna, &op->base);
 
-	op->blt  = gen7_render_fill_blt;
-	op->box  = gen7_render_fill_box;
-	op->done = gen7_render_fill_done;
+	op->blt   = gen7_render_fill_op_blt;
+	op->box   = gen7_render_fill_op_box;
+	op->boxes = gen7_render_fill_op_boxes;
+	op->done  = gen7_render_fill_op_done;
 	return TRUE;
 }
 
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6c45b68..e73804d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -43,6 +43,7 @@
 #include <mipict.h>
 #include <fbpict.h>
 #endif
+#include <miline.h>
 
 #include <sys/time.h>
 #include <sys/mman.h>
@@ -60,6 +61,7 @@
 #define FORCE_FLUSH 0
 
 #define USE_SPANS 0
+#define USE_ZERO_SPANS 1
 
 DevPrivateKeyRec sna_pixmap_index;
 DevPrivateKey sna_window_key;
@@ -2434,6 +2436,363 @@ fallback:
 	fbPolyPoint(drawable, gc, mode, n, pt);
 }
 
+static bool
+sna_poly_zero_line_blt(DrawablePtr drawable,
+		       struct kgem_bo *bo,
+		       struct sna_damage **damage,
+		       GCPtr gc, int mode, const int _n, const DDXPointRec * const _pt,
+		       const BoxRec *extents, unsigned clipped)
+{
+	static void * const _jump[] = {
+		&&no_damage,
+		&&damage,
+
+		&&no_damage_offset,
+		&&damage_offset,
+	};
+
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	int x2, y2, xstart, ystart;
+	int oc2, pt2_clipped = 0;
+	unsigned int bias = miGetZeroLineBias(drawable->pScreen);
+	bool degenerate = true;
+	struct sna_fill_op fill;
+	RegionRec clip;
+	BoxRec box[512], *b, * const last_box = box + ARRAY_SIZE(box);
+	const BoxRec *last_extents;
+	int16_t dx, dy;
+	void *jump, *ret;
+
+	DBG(("%s: alu=%d, pixel=%lx, n=%d, clipped=%d, damage=%p\n",
+	     __FUNCTION__, gc->alu, gc->fgPixel, n, clipped, damage));
+	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+		return FALSE;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+	region_set(&clip, extents);
+	if (clipped)
+		region_maybe_clip(&clip, gc->pCompositeClip);
+
+	jump = _jump[(damage != NULL) | !!(dx|dy) << 1];
+	DBG(("%s: [clipped] extents=(%d, %d), (%d, %d), delta=(%d, %d)\n",
+	     __FUNCTION__,
+	     clip.extents.x1, clip.extents.y1,
+	     clip.extents.x2, clip.extents.y2,
+	     dx, dy));
+
+	extents = REGION_RECTS(&clip);
+	last_extents = extents + REGION_NUM_RECTS(&clip);
+
+	b = box;
+	do {
+		int n = _n;
+		const DDXPointRec *pt = _pt;
+
+		xstart = pt->x + drawable->x;
+		ystart = pt->y + drawable->y;
+
+		/* x2, y2, oc2 copied to x1, y1, oc1 at top of loop to simplify
+		 * iteration logic
+		 */
+		x2 = xstart;
+		y2 = ystart;
+		oc2 = 0;
+		MIOUTCODES(oc2, x2, y2,
+			   clip.extents.x1,
+			   clip.extents.y1,
+			   clip.extents.x2,
+			   clip.extents.y2);
+
+		while (--n) {
+			int16_t sdx, sdy;
+			int16_t adx, ady;
+			int16_t e, e1, e2, e3;
+			int16_t length;
+			int x1 = x2, x;
+			int y1 = y2, y;
+			int oc1 = oc2;
+			int octant;
+
+			++pt;
+
+			x2 = pt->x;
+			y2 = pt->y;
+			if (mode == CoordModePrevious) {
+				x2 += x1;
+				y2 += y1;
+			} else {
+				x2 += drawable->x;
+				y2 += drawable->y;
+			}
+			DBG(("%s: segment (%d, %d) to (%d, %d)\n",
+			     __FUNCTION__, x1, y1, x2, y2));
+			if (x2 == x1 && y2 == y1)
+				continue;
+
+			degenerate = false;
+
+			oc2 = 0;
+			MIOUTCODES(oc2, x2, y2,
+				   clip.extents.x1,
+				   clip.extents.y1,
+				   clip.extents.x2,
+				   clip.extents.y2);
+			if (oc1 & oc2)
+				continue;
+
+			CalcLineDeltas(x1, y1, x2, y2,
+				       adx, ady, sdx, sdy,
+				       1, 1, octant);
+
+			DBG(("%s: adx=(%d, %d), sdx=(%d, %d)\n",
+			     __FUNCTION__, adx, ady, sdx, sdy));
+			if (adx == 0 || ady == 0) {
+				if (x1 <= x2) {
+					b->x1 = x1;
+					b->x2 = x2;
+				} else {
+					b->x1 = x2;
+					b->x2 = x1;
+				}
+				if (y1 <= y2) {
+					b->y1 = y1;
+					b->y2 = y2;
+				} else {
+					b->y1 = y2;
+					b->y2 = y1;
+				}
+				b->x2++;
+				b->y2++;
+				if (oc1 | oc2)
+					box_intersect(b, &clip.extents);
+				if (++b == last_box) {
+					ret = &&rectangle_continue;
+					goto *jump;
+rectangle_continue:
+					b = box;
+				}
+			} else if (adx >= ady) {
+				/* X-major segment */
+				e1 = ady << 1;
+				e2 = e1 - (adx << 1);
+				e  = e1 - adx;
+				length = adx;	/* don't draw endpoint in main loop */
+
+				FIXUP_ERROR(e, octant, bias);
+
+				x = x1;
+				y = y1;
+				pt2_clipped = 0;
+
+				if (oc1 | oc2) {
+					int x2_clipped = x2, y2_clipped = y2;
+					int pt1_clipped;
+
+					if (miZeroClipLine(clip.extents.x1, clip.extents.y1,
+							   clip.extents.x2, clip.extents.y2,
+							   &x, &y, &x2_clipped, &y2_clipped,
+							   adx, ady,
+							   &pt1_clipped, &pt2_clipped,
+							   octant, bias, oc1, oc2) == -1)
+						continue;
+
+					length = abs(x2_clipped - x);
+
+					/* if we've clipped the endpoint, always draw the full length
+					 * of the segment, because then the capstyle doesn't matter
+					 */
+					if (pt2_clipped)
+						length++;
+
+					if (pt1_clipped) {
+						int clipdx = abs(x - x1);
+						int clipdy = abs(y - y1);
+						e += clipdy * e2 + (clipdx - clipdy) * e1;
+					}
+				}
+				if (length == 0)
+					continue;
+
+				e3 = e2 - e1;
+				e  = e - e1;
+
+				b->x1 = x;
+				b->y2 = b->y1 = y;
+				while (length--) {
+					e += e1;
+					if (e >= 0) {
+						b->x2 = x;
+						if (b->x2 < b->x1) {
+							int16_t t = b->x1;
+							b->x1 = b->x2;
+							b->x2 = t;
+						}
+						b->x2++;
+						b->y2++;
+						if (++b == last_box) {
+							ret = &&X_continue;
+							goto *jump;
+X_continue:
+							b = box;
+						}
+						y += sdy;
+						e += e3;
+						b->y2 = b->y1 = y;
+						b->x1 = x;
+					}
+					x += sdx;
+				}
+			} else {
+				/* Y-major segment */
+				e1 = adx << 1;
+				e2 = e1 - (ady << 1);
+				e  = e1 - ady;
+				length  = ady;	/* don't draw endpoint in main loop */
+
+				SetYMajorOctant(octant);
+				FIXUP_ERROR(e, octant, bias);
+
+				x = x1;
+				y = y1;
+				pt2_clipped = 0;
+
+				if (oc1 | oc2) {
+					int x2_clipped = x2, y2_clipped = y2;
+					int pt1_clipped;
+
+					if (miZeroClipLine(clip.extents.x1,
+							   clip.extents.y1,
+							   clip.extents.x2,
+							   clip.extents.y2,
+							   &x, &y, &x2_clipped, &y2_clipped,
+							   adx, ady,
+							   &pt1_clipped, &pt2_clipped,
+							   octant, bias, oc1, oc2) == -1)
+						continue;
+
+					length = abs(y2 - y);
+
+					/* if we've clipped the endpoint, always draw the full length
+					 * of the segment, because then the capstyle doesn't matter
+					 */
+					if (pt2_clipped)
+						length++;
+
+					if (pt1_clipped) {
+						int clipdx = abs(x - x1);
+						int clipdy = abs(y - y1);
+						e += clipdx * e2 + (clipdy - clipdx) * e1;
+					}
+				}
+				if (length == 0)
+					continue;
+
+				e3 = e2 - e1;
+				e  = e - e1;
+
+				b->x2 = b->x1 = x;
+				b->y1 = y;
+				while (length--) {
+					e += e1;
+					if (e >= 0) {
+						b->y2 = y;
+						if (b->y2 < b->y1) {
+							int16_t t = b->y1;
+							b->y1 = b->y2;
+							b->y2 = t;
+						}
+						b->x2++;
+						b->y2++;
+						if (++b == last_box) {
+							ret = &&Y_continue;
+							goto *jump;
+Y_continue:
+							b = box;
+						}
+						x += sdx;
+						e += e3;
+						b->x2 = b->x1 = x;
+						b->y1 = y;
+					}
+					y += sdy;
+				}
+			}
+		}
+
+#if 0
+		/* Only do the CapNotLast check on the last segment
+		 * and only if the endpoint wasn't clipped.  And then, if the last
+		 * point is the same as the first point, do not draw it, unless the
+		 * line is degenerate
+		 */
+		if (!pt2_clipped &&
+		    gc->capStyle != CapNotLast &&
+		    !(xstart == x2 && ystart == y2 && !degenerate))
+		{
+			b->x2 = x2;
+			b->y2 = y2;
+			if (b->x2 < b->x1) {
+				int16_t t = b->x1;
+				b->x1 = b->x2;
+				b->x2 = t;
+			}
+			if (b->y2 < b->y1) {
+				int16_t t = b->y1;
+				b->y1 = b->y2;
+				b->y2 = t;
+			}
+			b->x2++;
+			b->y2++;
+			b++;
+		}
+#endif
+	} while (++extents != last_extents);
+
+	if (b != box) {
+		ret = &&done;
+		goto *jump;
+	}
+
+done:
+	fill.done(sna, &fill);
+	return true;
+
+damage:
+	sna_damage_add_boxes(damage, box, b-box, 0, 0);
+no_damage:
+	fill.boxes(sna, &fill, box, b-box);
+	goto *ret;
+
+no_damage_offset:
+	{
+		BoxRec *bb = box;
+		do {
+			bb->x1 += dx;
+			bb->x2 += dx;
+			bb->y1 += dy;
+			bb->y2 += dy;
+		} while (++bb != b);
+		fill.boxes(sna, &fill, box, b - box);
+	}
+	goto *ret;
+
+damage_offset:
+	{
+		BoxRec *bb = box;
+		do {
+			bb->x1 += dx;
+			bb->x2 += dx;
+			bb->y1 += dy;
+			bb->y2 += dy;
+		} while (++bb != b);
+		fill.boxes(sna, &fill, box, b - box);
+		sna_damage_add_boxes(damage, box, b - box, 0, 0);
+	}
+	goto *ret;
+}
+
 static Bool
 sna_poly_line_blt(DrawablePtr drawable,
 		  struct kgem_bo *bo,
@@ -2680,14 +3039,14 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	     flags & 2));
 	if (gc->fillStyle == FillSolid &&
 	    gc->lineStyle == LineSolid &&
-	    (gc->lineWidth == 0 || gc->lineWidth == 1) &&
-	    PM_IS_SOLID(drawable, gc->planemask) &&
-	    flags & 2) {
+	    gc->lineWidth <= 1 &&
+	    PM_IS_SOLID(drawable, gc->planemask)) {
 		struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
 
 		DBG(("%s: trying solid fill [%08lx]\n",
 		     __FUNCTION__, gc->fgPixel));
 
+	    if (flags & 2) {
 		if (sna_drawable_use_gpu_bo(drawable, &region.extents) &&
 		    sna_poly_line_blt(drawable,
 				      priv->gpu_bo,
@@ -2701,6 +3060,17 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 				      reduce_damage(drawable, &priv->cpu_damage, &region.extents),
 				      gc, mode, n, pt, flags & 4))
 			return;
+	    } else { /* !rectilinear */
+		if (USE_ZERO_SPANS &&
+		    sna_drawable_use_gpu_bo(drawable, &region.extents) &&
+		    sna_poly_zero_line_blt(drawable,
+					   priv->gpu_bo,
+					   priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, &region.extents),
+					   gc, mode, n, pt,
+					   &region.extents, flags & 4))
+			return;
+
+	    }
 	}
 
 	if (USE_SPANS && can_fill_spans(drawable, gc) &&
@@ -2745,22 +3115,6 @@ fallback:
 }
 
 static Bool
-sna_poly_segment_can_blt(int n, xSegment *seg)
-{
-	while (n--) {
-		if (seg->x1 != seg->x2 && seg->y1 != seg->y2) {
-			DBG(("%s: (%d, %d) -> (%d, %d)\n",
-			     __FUNCTION__, seg->x1, seg->y1, seg->x2, seg->y2));
-			return FALSE;
-		}
-
-		seg++;
-	}
-
-	return TRUE;
-}
-
-static Bool
 sna_poly_segment_blt(DrawablePtr drawable,
 		     struct kgem_bo *bo,
 		     struct sna_damage **damage,
@@ -2895,16 +3249,320 @@ sna_poly_segment_blt(DrawablePtr drawable,
 	return TRUE;
 }
 
-static Bool
+static bool
+sna_poly_zero_segment_blt(DrawablePtr drawable,
+			  struct kgem_bo *bo,
+			  struct sna_damage **damage,
+			  GCPtr gc, const int _n, const xSegment *_s,
+			  const BoxRec *extents, unsigned clipped)
+{
+	static void * const _jump[] = {
+		&&no_damage,
+		&&damage,
+
+		&&no_damage_offset,
+		&&damage_offset,
+	};
+
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	unsigned int bias = miGetZeroLineBias(drawable->pScreen);
+	struct sna_fill_op fill;
+	RegionRec clip;
+	const BoxRec *last_extents;
+	BoxRec box[512], *b;
+	BoxRec *const last_box = box + ARRAY_SIZE(box);
+	int16_t dx, dy;
+	void *jump, *ret;
+
+	DBG(("%s: alu=%d, pixel=%lx, n=%d, clipped=%d, damage=%p\n",
+	     __FUNCTION__, gc->alu, gc->fgPixel, n, clipped, damage));
+	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+		return FALSE;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+	region_set(&clip, extents);
+	if (clipped)
+		region_maybe_clip(&clip, gc->pCompositeClip);
+	DBG(("%s: [clipped] extents=(%d, %d), (%d, %d), delta=(%d, %d)\n",
+	     __FUNCTION__,
+	     clip.extents.x1, clip.extents.y1,
+	     clip.extents.x2, clip.extents.y2,
+	     dx, dy));
+
+	jump = _jump[(damage != NULL) | !!(dx|dy) << 1];
+
+	b = box;
+	extents = REGION_RECTS(&clip);
+	last_extents = extents + REGION_NUM_RECTS(&clip);
+	do {
+		int n = _n;
+		const xSegment *s = _s;
+		do {
+			int16_t sdx, sdy;
+			int16_t adx, ady;
+			int16_t e, e1, e2, e3;
+			int16_t length;
+			int x1, x2;
+			int y1, y2;
+			int oc1, oc2;
+			int octant;
+
+			x1 = s->x1 + drawable->x;
+			y1 = s->y1 + drawable->y;
+			x2 = s->x2 + drawable->x;
+			y2 = s->y2 + drawable->y;
+			s++;
+
+			DBG(("%s: segment (%d, %d) to (%d, %d)\n",
+			     __FUNCTION__, x1, y1, x2, y2));
+			if (x2 == x1 && y2 == y1)
+				continue;
+
+			oc1 = 0;
+			MIOUTCODES(oc1, x1, y1,
+				   extents->x1,
+				   extents->y1,
+				   extents->x2,
+				   extents->y2);
+			oc2 = 0;
+			MIOUTCODES(oc2, x2, y2,
+				   extents->x1,
+				   extents->y1,
+				   extents->x2,
+				   extents->y2);
+			if (oc1 & oc2)
+				continue;
+
+			CalcLineDeltas(x1, y1, x2, y2,
+				       adx, ady, sdx, sdy,
+				       1, 1, octant);
+
+			DBG(("%s: adx=(%d, %d), sdx=(%d, %d)\n",
+			     __FUNCTION__, adx, ady, sdx, sdy));
+			if (adx == 0 || ady == 0) {
+				if (x1 <= x2) {
+					b->x1 = x1;
+					b->x2 = x2;
+				} else {
+					b->x1 = x2;
+					b->x2 = x1;
+				}
+				if (y1 <= y2) {
+					b->y1 = y1;
+					b->y2 = y2;
+				} else {
+					b->y1 = y2;
+					b->y2 = y1;
+				}
+				b->x2++;
+				b->y2++;
+				if (box_intersect(b, extents)) {
+					if (++b == last_box) {
+						ret = &&rectangle_continue;
+						goto *jump;
+rectangle_continue:
+						b = box;
+					}
+				}
+			} else if (adx >= ady) {
+				/* X-major segment */
+				e1 = ady << 1;
+				e2 = e1 - (adx << 1);
+				e  = e1 - adx;
+				length = adx;	/* don't draw endpoint in main loop */
+
+				FIXUP_ERROR(e, octant, bias);
+
+				if (oc1 | oc2) {
+					int pt1_clipped, pt2_clipped;
+					int x = x1, y = y1;
+
+					if (miZeroClipLine(extents->x1,
+							   extents->y1,
+							   extents->x2,
+							   extents->y2,
+							   &x1, &y1, &x2, &y2,
+							   adx, ady,
+							   &pt1_clipped, &pt2_clipped,
+							   octant, bias, oc1, oc2) == -1)
+						continue;
+
+					length = abs(x2 - x1);
+
+					/* if we've clipped the endpoint, always draw the full length
+					 * of the segment, because then the capstyle doesn't matter
+					 */
+					if (pt2_clipped)
+						length++;
+
+					if (pt1_clipped) {
+						int clipdx = abs(x1 - x);
+						int clipdy = abs(y1 - y);
+						e += clipdy * e2 + (clipdx - clipdy) * e1;
+					}
+				}
+				if (length == 0)
+					continue;
+
+				e3 = e2 - e1;
+				e  = e - e1;
+
+				b->x1 = x1;
+				b->y2 = b->y1 = y1;
+				while (length--) {
+					e += e1;
+					if (e >= 0) {
+						b->x2 = x1;
+						if (b->x2 < b->x1) {
+							int16_t t = b->x1;
+							b->x1 = b->x2;
+							b->x2 = t;
+						}
+						b->x2++;
+						b->y2++;
+						if (++b == last_box) {
+							ret = &&X_continue;
+							goto *jump;
+X_continue:
+							b = box;
+						}
+						y1 += sdy;
+						e += e3;
+						b->y2 = b->y1 = y1;
+						b->x1 = x1;
+					}
+					x1 += sdx;
+				}
+			} else {
+				/* Y-major segment */
+				e1 = adx << 1;
+				e2 = e1 - (ady << 1);
+				e  = e1 - ady;
+				length  = ady;	/* don't draw endpoint in main loop */
+
+				SetYMajorOctant(octant);
+				FIXUP_ERROR(e, octant, bias);
+
+				if (oc1 | oc2) {
+					int pt1_clipped, pt2_clipped;
+					int x = x1, y = y1;
+
+					if (miZeroClipLine(extents->x1,
+							   extents->y1,
+							   extents->x2,
+							   extents->y2,
+							   &x1, &y1, &x2, &y2,
+							   adx, ady,
+							   &pt1_clipped, &pt2_clipped,
+							   octant, bias, oc1, oc2) == -1)
+						continue;
+
+					length = abs(y2 - y1);
+
+					/* if we've clipped the endpoint, always draw the full length
+					 * of the segment, because then the capstyle doesn't matter
+					 */
+					if (pt2_clipped)
+						length++;
+
+					if (pt1_clipped) {
+						int clipdx = abs(x1 - x);
+						int clipdy = abs(y1 - y);
+						e += clipdx * e2 + (clipdy - clipdx) * e1;
+					}
+				}
+				if (length == 0)
+					continue;
+
+				e3 = e2 - e1;
+				e  = e - e1;
+
+				b->x2 = b->x1 = x1;
+				b->y1 = y1;
+				while (length--) {
+					e += e1;
+					if (e >= 0) {
+						b->y2 = y1;
+						if (b->y2 < b->y1) {
+							int16_t t = b->y1;
+							b->y1 = b->y2;
+							b->y2 = t;
+						}
+						b->x2++;
+						b->y2++;
+						if (++b == last_box) {
+							ret = &&Y_continue;
+							goto *jump;
+Y_continue:
+							b = box;
+						}
+						x1 += sdx;
+						e += e3;
+						b->x2 = b->x1 = x1;
+						b->y1 = y1;
+					}
+					y1 += sdy;
+				}
+			}
+		} while (--n);
+	} while (++extents != last_extents);
+
+	if (b != box) {
+		ret = &&done;
+		goto *jump;
+	}
+
+done:
+	fill.done(sna, &fill);
+	return true;
+
+damage:
+	sna_damage_add_boxes(damage, box, b-box, 0, 0);
+no_damage:
+	fill.boxes(sna, &fill, box, b-box);
+	goto *ret;
+
+no_damage_offset:
+	{
+		BoxRec *bb = box;
+		do {
+			bb->x1 += dx;
+			bb->x2 += dx;
+			bb->y1 += dy;
+			bb->y2 += dy;
+		} while (++bb != b);
+		fill.boxes(sna, &fill, box, b - box);
+	}
+	goto *ret;
+
+damage_offset:
+	{
+		BoxRec *bb = box;
+		do {
+			bb->x1 += dx;
+			bb->x2 += dx;
+			bb->y1 += dy;
+			bb->y2 += dy;
+		} while (++bb != b);
+		fill.boxes(sna, &fill, box, b - box);
+		sna_damage_add_boxes(damage, box, b - box, 0, 0);
+	}
+	goto *ret;
+}
+
+static unsigned
 sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
 			 int n, xSegment *seg,
 			 BoxPtr out)
 {
 	BoxRec box;
 	int extra = gc->lineWidth;
+	bool clipped, can_blit;
 
 	if (n == 0)
-		return true;
+		return 0;
 
 	if (gc->capStyle != CapProjecting)
 		extra >>= 1;
@@ -2925,6 +3583,7 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
 		box.y1 = seg->y2;
 	}
 
+	can_blit = seg->x1 == seg->x2 || seg->y1 == seg->y2;
 	while (--n) {
 		seg++;
 		if (seg->x2 > seg->x1) {
@@ -2942,6 +3601,9 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
 			if (seg->y2 < box.y1) box.y1 = seg->y2;
 			if (seg->y1 > box.y2) box.y2 = seg->y1;
 		}
+
+		if (can_blit && !(seg->x1 == seg->x2 || seg->y1 == seg->y2))
+			can_blit = false;
 	}
 
 	box.x2++;
@@ -2954,9 +3616,11 @@ sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc,
 		box.y2 += extra;
 	}
 
-	trim_and_translate_box(&box, drawable, gc);
+	clipped = trim_and_translate_box(&box, drawable, gc);
+	if (box_empty(&box))
+		return 0;
 	*out = box;
-	return box_empty(&box);
+	return 1 | clipped << 1 | can_blit << 2;
 }
 
 static void
@@ -2964,13 +3628,15 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 {
 	struct sna *sna = to_sna_from_drawable(drawable);
 	RegionRec region;
+	unsigned flags;
 
 	DBG(("%s(n=%d, first=((%d, %d), (%d, %d)), lineWidth=%d\n",
 	     __FUNCTION__,
 	     n, seg->x1, seg->y1, seg->x2, seg->y2,
 	     gc->lineWidth));
 
-	if (sna_poly_segment_extents(drawable, gc, n, seg, &region.extents))
+	flags = sna_poly_segment_extents(drawable, gc, n, seg, &region.extents);
+	if (flags == 0)
 		return;
 
 	DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__,
@@ -2991,17 +3657,17 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 	     gc->lineStyle, gc->lineStyle == LineSolid,
 	     gc->lineWidth,
 	     gc->planemask, PM_IS_SOLID(drawable, gc->planemask),
-	     sna_poly_segment_can_blt(n, seg)));
+	     flags & 4));
 	if (gc->fillStyle == FillSolid &&
 	    gc->lineStyle == LineSolid &&
-	    (gc->lineWidth == 0 || gc->lineWidth == 1) &&
-	    PM_IS_SOLID(drawable, gc->planemask) &&
-	    sna_poly_segment_can_blt(n, seg)) {
+	    gc->lineWidth <= 1 &&
+	    PM_IS_SOLID(drawable, gc->planemask)) {
 		struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
 
 		DBG(("%s: trying blt solid fill [%08lx] paths\n",
 		     __FUNCTION__, gc->fgPixel));
 
+	    if (flags & 4) {
 		if (sna_drawable_use_gpu_bo(drawable, &region.extents) &&
 		    sna_poly_segment_blt(drawable,
 					 priv->gpu_bo,
@@ -3015,6 +3681,15 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 					 reduce_damage(drawable, &priv->cpu_damage, &region.extents),
 					 gc, n, seg, &region.extents))
 			return;
+	    } else {
+		    if (USE_ZERO_SPANS &&
+			sna_drawable_use_gpu_bo(drawable, &region.extents) &&
+			sna_poly_zero_segment_blt(drawable,
+						  priv->gpu_bo,
+						  priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, &region.extents),
+						  gc, n, seg, &region.extents, flags & 2))
+			    return;
+	    }
 	}
 
 	/* XXX Do we really want to base this decision on the amalgam ? */
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 62f8cdf..9b598c9 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1297,6 +1297,66 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna,
 	*(uint64_t *)(b+1) = *(uint64_t *)box;
 }
 
+fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
+					   const struct sna_fill_op *op,
+					   const BoxRec *box,
+					   int nbox)
+{
+	struct kgem *kgem = &sna->kgem;
+	uint32_t cmd = op->base.u.blt.cmd;
+
+	DBG(("%s: %08x x %d\n", __FUNCTION__,
+	     op->base.u.blt.pixel, nbox));
+
+	if (!kgem_check_batch(kgem, 3))
+		sna_blt_fill_begin(sna, &op->base.u.blt);
+
+	do {
+		uint32_t *b = kgem->batch + kgem->nbatch;
+		int nbox_this_time;
+
+		nbox_this_time = nbox;
+		if (3*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+			nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 3;
+		assert(nbox_this_time);
+		nbox -= nbox_this_time;
+
+		kgem->nbatch += 3 * nbox_this_time;
+		while (nbox_this_time >= 8) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
+			b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++;
+			b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++;
+			b[12] = cmd; *(uint64_t *)(b+13) = *(uint64_t *)box++;
+			b[15] = cmd; *(uint64_t *)(b+16) = *(uint64_t *)box++;
+			b[18] = cmd; *(uint64_t *)(b+19) = *(uint64_t *)box++;
+			b[21] = cmd; *(uint64_t *)(b+22) = *(uint64_t *)box++;
+			b += 24;
+			nbox_this_time -= 8;
+		}
+		if (nbox_this_time & 4) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
+			b[6] = cmd; *(uint64_t *)(b+7) = *(uint64_t *)box++;
+			b[9] = cmd; *(uint64_t *)(b+10) = *(uint64_t *)box++;
+			b += 12;
+		}
+		if (nbox_this_time & 2) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+			b[3] = cmd; *(uint64_t *)(b+4) = *(uint64_t *)box++;
+			b += 6;
+		}
+		if (nbox_this_time & 1) {
+			b[0] = cmd; *(uint64_t *)(b+1) = *(uint64_t *)box++;
+		}
+
+		if (!nbox)
+			return;
+
+		sna_blt_fill_begin(sna, &op->base.u.blt);
+	} while (1);
+}
+
 static void sna_blt_fill_op_done(struct sna *sna,
 				 const struct sna_fill_op *fill)
 {
@@ -1324,9 +1384,10 @@ bool sna_blt_fill(struct sna *sna, uint8_t alu,
 			       bo, bpp, alu, pixel))
 		return FALSE;
 
-	fill->blt  = sna_blt_fill_op_blt;
-	fill->box  = sna_blt_fill_op_box;
-	fill->done = sna_blt_fill_op_done;
+	fill->blt   = sna_blt_fill_op_blt;
+	fill->box   = sna_blt_fill_op_box;
+	fill->boxes = sna_blt_fill_op_boxes;
+	fill->done  = sna_blt_fill_op_done;
 	return TRUE;
 }
 
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index d30c0b6..6c18791 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -160,6 +160,10 @@ struct sna_fill_op {
 	fastcall void (*box)(struct sna *sna,
 			     const struct sna_fill_op *op,
 			     const BoxRec *box);
+	fastcall void (*boxes)(struct sna *sna,
+			       const struct sna_fill_op *op,
+			       const BoxRec *box,
+			       int count);
 	void (*done)(struct sna *sna, const struct sna_fill_op *op);
 };
 
commit c12371d9e7b3bbff7f318186a0933d6108db0bc8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Oct 21 22:28:56 2011 +0100

    sna: Flatten the branching for fill-spans
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 11f5c21..6c45b68 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1768,123 +1768,244 @@ sna_fill_spans_blt(DrawablePtr drawable,
 		   struct kgem_bo *bo, struct sna_damage **damage,
 		   GCPtr gc, int n,
 		   DDXPointPtr pt, int *width, int sorted,
-		   const BoxRec *extents, bool clipped)
+		   const BoxRec *extents, unsigned clipped)
 {
 	struct sna *sna = to_sna_from_drawable(drawable);
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
-	RegionRec clip;
 	int16_t dx, dy;
 	struct sna_fill_op fill;
+	static void * const jump[] = {
+		&&no_damage_translate,
+		&&damage_translate,
+		&&no_damage_clipped_translate,
+		&&damage_clipped_translate,
+
+		&&no_damage,
+		&&damage,
+		&&no_damage_clipped,
+		&&damage_clipped,
+	};
+	unsigned v;
+
+	DBG(("%s: alu=%d, fg=%08lx, damge=%p, clipped?=%d\n",
+	     __FUNCTION__, gc->alu, gc->fgPixel, damage, clipped));
 
 	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
 		return false;
 
-	region_set(&clip, extents);
-	region_maybe_clip(&clip, gc->pCompositeClip);
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
 
-	DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
-	     __FUNCTION__,
-	     REGION_NUM_RECTS(&clip),
-	     extents->x1, extents->y1, extents->x2, extents->y2,
-	     n, pt->x, pt->y));
+	v = (damage != NULL) | clipped | gc->miTranslate << 2;
+	goto *jump[v];
 
-	if (!gc->miTranslate) {
+no_damage_translate:
+	dx += drawable->x;
+	dy += drawable->y;
+no_damage:
+	do {
+		BoxRec box;
+
+		box.x1 = pt->x + dx;
+		box.x2 = box.x1 + *width++;
+		box.y1 = pt->y + dy;
+		box.y2 = box.y1 + 1;
+		pt++;
+
+		fill.box(sna, &fill, &box);
+	} while (--n);
+	goto done;
+
+damage_translate:
+	dx += drawable->x;
+	dy += drawable->y;
+damage:
+	do {
+		BoxRec box;
+
+		box.x1 = pt->x + dx;
+		box.x2 = box.x1 + *width++;
+		box.y1 = pt->y + dy;
+		box.y2 = box.y1 + 1;
+		pt++;
+
+		fill.box(sna, &fill, &box);
+		assert_pixmap_contains_box(pixmap, &box);
+		sna_damage_add_box(damage, &box);
+	} while (--n);
+	goto done;
+
+	{
+		RegionRec clip;
 		int i;
 
+no_damage_clipped_translate:
 		for (i = 0; i < n; i++) {
 			/* XXX overflow? */
 			pt->x += drawable->x;
 			pt->y += drawable->y;
 		}
-	}
 
-	get_drawable_deltas(drawable, pixmap, &dx, &dy);
-	if (!clipped) {
-		if (damage) {
+no_damage_clipped:
+		region_set(&clip, extents);
+		region_maybe_clip(&clip, gc->pCompositeClip);
+
+		DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
+		     __FUNCTION__,
+		     REGION_NUM_RECTS(&clip),
+		     clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2,
+		     n, pt->x, pt->y));
+
+		if (clip.data == NULL) {
 			do {
 				BoxRec box;
 
-				box.x1 = pt->x + dx;
-				box.x2 = box.x1 + *width++;
-				box.y1 = pt->y + dy;
+				box.x1 = pt->x;
+				box.y1 = pt->y;
+				box.x2 = box.x1 + (int)*width++;
 				box.y2 = box.y1 + 1;
 				pt++;
 
-				fill.box(sna, &fill, &box);
-
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
+				if (box_intersect(&box, &clip.extents)) {
+					box.x1 += dx;
+					box.x2 += dx;
+					box.y1 += dy;
+					box.y2 += dy;
+					fill.box(sna, &fill, &box);
+				}
 			} while (--n);
 		} else {
 			do {
-				BoxRec box;
+				int nc = clip.data->numRects;
+				const BoxRec *b = RegionBoxptr(&clip);
+				int16_t X1 = pt->x;
+				int16_t y = pt->y;
+				int16_t X2 = X1 + (int)*width;
 
-				box.x1 = pt->x + dx;
-				box.x2 = box.x1 + *width++;
-				box.y1 = pt->y + dy;
-				box.y2 = box.y1 + 1;
 				pt++;
-
-				fill.box(sna, &fill, &box);
+				width++;
+
+				if (y < extents->y1 || extents->y2 <= y)
+					continue;
+
+				if (X1 < extents->x1)
+					X1 = extents->x1;
+
+				if (X2 > extents->x2)
+					X2 = extents->x2;
+
+				if (X1 >= X2)
+					continue;
+
+				y += dy;
+				do {
+					if (b->y1 <= y && y < b->y2) {
+						int x1 = b->x1;
+						int x2 = b->x2;
+
+						if (x1 < X1)
+							x1 = X1;
+						x1 += dx;
+						if (x1 < 0)
+							x1 = 0;
+						if (x2 > X2)
+							x2 = X2;
+						x2 += dx;
+						if (x2 > pixmap->drawable.width)
+							x2 = pixmap->drawable.width;
+
+						if (x2 > x1)
+							fill.blt(sna, &fill, x1, y, x2-x1, 1);
+					}
+					b++;
+				} while (--nc);
 			} while (--n);
+			RegionUninit(&clip);
 		}
-	} else do {
-		int16_t X1 = pt->x;
-		int16_t y = pt->y;
-		int16_t X2 = X1 + (int)*width;
-
-		pt++;
-		width++;
+		goto done;
+	}
 
-		if (y < extents->y1 || extents->y2 <= y)
-			continue;
+	{
+		RegionRec clip;
+		int i;
 
-		if (X1 < extents->x1)
-			X1 = extents->x1;
+damage_clipped_translate:
+		for (i = 0; i < n; i++) {
+			/* XXX overflow? */
+			pt->x += drawable->x;
+			pt->y += drawable->y;
+		}
 
-		if (X2 > extents->x2)
-			X2 = extents->x2;
+damage_clipped:
+		region_set(&clip, extents);
+		region_maybe_clip(&clip, gc->pCompositeClip);
 
-		if (X1 >= X2)
-			continue;
+		DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
+		     __FUNCTION__,
+		     REGION_NUM_RECTS(&clip),
+		     clip.extents.x1, clip.extents.y1, clip.extents.x2, clip.extents.y2,
+		     n, pt->x, pt->y));
 
-		y += dy;
 		if (clip.data == NULL) {
-			fill.blt(sna, &fill, X1 + dx, y, X2-X1, 1);
-			if (damage) {
+			do {
 				BoxRec box;
 
-				box.x1 = X1 + dx;
-				box.x2 = X2 + dx;
-				box.y1 = y;
+				box.x1 = pt->x;
+				box.y1 = pt->y;
+				box.x2 = box.x1 + (int)*width++;
 				box.y2 = box.y1 + 1;
+				pt++;
 
-				assert_pixmap_contains_box(pixmap, &box);
-				sna_damage_add_box(damage, &box);
-			}
+				if (box_intersect(&box, &clip.extents)) {
+					box.x1 += dx;
+					box.x2 += dx;
+					box.y1 += dy;
+					box.y2 += dy;
+					fill.box(sna, &fill, &box);
+					assert_pixmap_contains_box(pixmap, &box);
+					sna_damage_add_box(damage, &box);
+				}
+			} while (--n);
 		} else {
-			int nc = clip.data->numRects;
-			const BoxRec *b = RegionBoxptr(&clip);
-			while (nc--) {
-				if (b->y1 <= y && y < b->y2) {
-					int x1 = b->x1;
-					int x2 = b->x2;
-
-					if (x1 < X1)
-						x1 = X1;
-					x1 += dx;
-					if (x1 < 0)
-						x1 = 0;
-					if (x2 > X2)
-						x2 = X2;
-					x2 += dx;
-					if (x2 > pixmap->drawable.width)
-						x2 = pixmap->drawable.width;
-
-					if (x2 > x1) {
-						fill.blt(sna, &fill,
-							 x1, y, x2-x1, 1);
-						if (damage) {
+			do {
+				int nc = clip.data->numRects;
+				const BoxRec *b = RegionBoxptr(&clip);
+				int16_t X1 = pt->x;
+				int16_t y = pt->y;
+				int16_t X2 = X1 + (int)*width;
+
+				pt++;
+				width++;
+
+				if (y < extents->y1 || extents->y2 <= y)
+					continue;
+
+				if (X1 < extents->x1)
+					X1 = extents->x1;
+
+				if (X2 > extents->x2)
+					X2 = extents->x2;
+
+				if (X1 >= X2)
+					continue;
+
+				y += dy;
+				do {
+					if (b->y1 <= y && y < b->y2) {
+						int x1 = b->x1;
+						int x2 = b->x2;
+
+						if (x1 < X1)
+							x1 = X1;
+						x1 += dx;
+						if (x1 < 0)
+							x1 = 0;
+						if (x2 > X2)
+							x2 = X2;
+						x2 += dx;
+						if (x2 > pixmap->drawable.width)
+							x2 = pixmap->drawable.width;
+
+						if (x2 > x1) {
 							BoxRec box;
 
 							box.x1 = x1;
@@ -1892,17 +2013,21 @@ sna_fill_spans_blt(DrawablePtr drawable,
 							box.x2 = x2;
 							box.y2 = box.y1 + 1;
 
+							fill.box(sna, &fill, &box);
 							assert_pixmap_contains_box(pixmap, &box);
 							sna_damage_add_box(damage, &box);
 						}
 					}
-				}
-				b++;
-			}
+					b++;
+				} while (--nc);
+			} while (--n);
+			RegionUninit(&clip);
 		}
-	} while (--n);
+		goto done;
+	}
+
+done:
 	fill.done(sna, &fill);
-	RegionUninit(&clip);
 	return TRUE;
 }
 
commit e7f19d85fead62df21eedc87409b5abacfec4a17
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Oct 21 22:15:09 2011 +0100

    sna/blt: Upload the box using a single 64-bit instruction
    
    So long as we have 64-bit instructions of course!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 9ab7d8e..62f8cdf 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1294,8 +1294,7 @@ fastcall static void sna_blt_fill_op_box(struct sna *sna,
 	kgem->nbatch += 3;
 
 	b[0] = op->base.u.blt.cmd;
-	b[1] = box->y1 << 16 | box->x1;
-	b[2] = box->y2 << 16 | box->x2;
+	*(uint64_t *)(b+1) = *(uint64_t *)box;
 }
 
 static void sna_blt_fill_op_done(struct sna *sna,
@@ -1447,8 +1446,7 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 
 	b[0] = cmd;
 	b[1] = br13;
-	b[2] = box->y1 << 16 | box->x1;
-	b[3] = box->y2 << 16 | box->x2;
+	*(uint64_t *)(b+2) = *(uint64_t *)box;
 	b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
 			      bo,
 			      I915_GEM_DOMAIN_RENDER << 16 |
@@ -1566,8 +1564,7 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
 			b = kgem->batch + kgem->nbatch;
 			kgem->nbatch += 3;
 			b[0] = cmd;
-			b[1] = box->y1 << 16 | box->x1;
-			b[2] = box->y2 << 16 | box->x2;
+			*(uint64_t *)(b+1) = *(uint64_t *)box;
 			box++;
 		} while (--nbox_this_time);
 
commit 40af32a0e9ed971a1f2c2a45266f32016bda7ed3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Oct 21 20:10:02 2011 +0100

    sna: Execute blits directly for PolyRectangle
    
    By constructing the batch buffer directly for PolyRectangle, rather than
    via miPolyRectangle->(PolyFillRectangle/PolyLine), we dramatically
    reduce the CPU overhead and can saturate the GPU.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 636544e..11f5c21 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2940,6 +2940,562 @@ fallback:
 	fbPolySegment(drawable, gc, n, seg);
 }
 
+static unsigned
+sna_poly_rectangle_extents(DrawablePtr drawable, GCPtr gc,
+			   int n, xRectangle *r,
+			   BoxPtr out)
+{
+	BoxRec box;
+	int extra = gc->lineWidth >> 1;
+	bool clipped;
+
+	if (n == 0)
+		return 0;
+
+	box.x1 = r->x;
+	box.y1 = r->y;
+	box.x2 = r->x + r->width;
+	box.y2 = r->y + r->height;
+
+	while (--n) {
+		r++;
+		if (r->x < box.x1)
+			box.x1 = r->x;
+		if (r->x + r->width > box.x2)
+			box.x2 = r->x + r->width;
+		if (r->y < box.y1)
+			box.y1 = r->y;
+		if (r->y + r->width > box.y2)
+			box.y2 = r->y + r->height;
+	}
+
+	box.x2++;
+	box.y2++;
+
+	if (extra) {
+		box.x1 -= extra;
+		box.x2 += extra;
+		box.y1 -= extra;
+		box.y2 += extra;
+	}
+
+	if (box_empty(&box))
+		return 0;
+
+	clipped = trim_and_translate_box(&box, drawable, gc);
+	*out = box;
+	return 1 | clipped << 1;
+}
+
+static Bool
+sna_poly_rectangle_blt(DrawablePtr drawable,
+		       struct kgem_bo *bo,
+		       struct sna_damage **damage,
+		       GCPtr gc, int n, xRectangle *r,
+		       const BoxRec *extents, bool clipped)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna_fill_op fill;
+	int16_t dx, dy;
+	static void * const jump[] = {
+		&&no_damage_zero,
+		&&maybe_damage_zero_clipped,
+		&&no_damage_wide,
+		&&maybe_damage_wide_clipped,
+
+		&&damage_zero,
+		&&maybe_damage_zero_clipped,
+		&&damage_wide,
+		&&maybe_damage_wide_clipped,
+	};
+	unsigned v;
+
+	DBG(("%s: alu=%d, width=%d, fg=%08lx, damge=%p, clipped?=%d\n",
+	     __FUNCTION__, gc->alu, gc->lineWidth, gc->fgPixel, damage, clipped));
+
+	if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel))
+		return FALSE;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+
+	v = !!clipped;
+	v |= (gc->lineWidth != 0) << 1;
+	v |= (damage != NULL) << 2;
+	goto *jump[v];
+
+damage_zero:
+	dx += drawable->x;
+	dy += drawable->y;
+
+	do {
+		BoxRec box;
+
+		if (r->width <= 1 || r->height <= 1) {
+			box.x1 = r->x + dx;
+			box.y1 = r->y + dy;
+			box.x2 = box.x1 + r->width + 1;
+			box.y2 = box.y1 + r->height + 1;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+			assert_pixmap_contains_box(pixmap, &box);
+			sna_damage_add_box(damage, &box);
+		} else {
+			box.x1 = r->x + dx;
+			box.y1 = r->y + dy;
+			box.x2 = box.x1 + r->width + 1;
+			box.y2 = box.y1 + 1;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+			assert_pixmap_contains_box(pixmap, &box);
+			sna_damage_add_box(damage, &box);
+
+			box.y1 += r->height;
+			box.y2 += r->height;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+			assert_pixmap_contains_box(pixmap, &box);
+			sna_damage_add_box(damage, &box);
+
+			box.y1 = r->y + dy + 1;
+			box.y2 = box.y1 + r->height - 1;
+			box.x1 = r->x + dx;
+			box.x2 = box.x1 + 1;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+			assert_pixmap_contains_box(pixmap, &box);
+			sna_damage_add_box(damage, &box);
+
+			box.x1 += r->width;
+			box.x2 += r->width;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+			assert_pixmap_contains_box(pixmap, &box);
+			sna_damage_add_box(damage, &box);
+		}
+		r++;
+	} while (--n);
+	goto done;
+
+no_damage_zero:
+	dx += drawable->x;
+	dy += drawable->y;
+
+	do {
+		BoxRec box;
+
+		if (r->width <= 1 || r->height <= 1) {
+			box.x1 = r->x + dx;
+			box.y1 = r->y + dy;
+			box.x2 = box.x1 + r->width + 1;
+			box.y2 = box.y1 + r->height + 1;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+		} else {
+			box.x1 = r->x + dx;
+			box.y1 = r->y + dy;
+			box.x2 = box.x1 + r->width + 1;
+			box.y2 = box.y1 + 1;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+
+			box.y1 += r->height;
+			box.y2 += r->height;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+
+			box.y1 = r->y + dy + 1;
+			box.y2 = box.y1 + r->height - 1;
+			box.x1 = r->x + dx;
+			box.x2 = box.x1 + 1;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+
+			box.x1 += r->width;
+			box.x2 += r->width;
+			DBG(("%s: blt (%d, %d), (%d, %d)\n",
+			     __FUNCTION__,
+			     box.x1, box.y1, box.x2, box.y2));
+			fill.box(sna, &fill, &box);
+		}
+		r++;
+	} while (--n);
+	goto done;
+
+maybe_damage_zero_clipped:
+	{
+		RegionRec clip;
+		BoxRec box[4], *b, *c;
+		int count = 0, i, j;
+
+		region_set(&clip, extents);
+		region_maybe_clip(&clip, gc->pCompositeClip);
+
+		do {
+			if (r->width <= 1 || r->height <= 1) {
+				box[0].x1 = r->x + drawable->x;
+				box[0].y1 = r->y + drawable->y;
+				box[0].x2 = box[0].x1 + r->width + 1;
+				box[0].y2 = box[0].y1 + r->height + 1;
+				count = 1;
+			} else {
+				box[0].x1 = r->x + drawable->x;
+				box[0].y1 = r->y + drawable->y;
+				box[0].x2 = box[0].x1 + r->width + 1;
+				box[0].y2 = box[0].y1 + 1;
+
+				box[1] = box[0];
+				box[1].y1 += r->height;
+				box[1].y2 += r->height;
+
+				box[2].y1 = r->y + drawable->y + 1;
+				box[2].y2 = box[2].y1 + r->height - 1;
+				box[2].x1 = r->x + drawable->x;
+				box[2].x2 = box[2].x1 + 1;
+
+				box[3] = box[2];
+				box[3].x1 += r->width;
+				box[3].x2 += r->width;
+				count = 4;
+			}
+
+			for (i = REGION_NUM_RECTS(&clip), c = REGION_RECTS(&clip); i--; c++) {
+				for (j = count, b = box; j--; b++) {
+					BoxRec clipped = *b;
+					if (box_intersect(&clipped, c)) {
+						clipped.x1 += dx;
+						clipped.x2 += dx;
+						clipped.y1 += dy;
+						clipped.y2 += dy;
+						DBG(("%s: blt (%d, %d), (%d, %d)\n",
+						     __FUNCTION__,
+						     clipped.x1, clipped.y1, clipped.x2, clipped.y2));
+						fill.box(sna, &fill, &clipped);
+						if (damage) {
+							assert_pixmap_contains_box(pixmap, &clipped);
+							sna_damage_add_box(damage, &clipped);
+						}
+					}
+				}
+			}
+			r++;
+		} while (--n);
+	}
+	goto done;
+
+maybe_damage_wide_clipped:
+	{
+		RegionRec clip;
+		int16_t offset2 = gc->lineWidth;
+		int16_t offset1 = offset2 >> 1;
+		int16_t offset3 = offset2 - offset1;
+
+		region_set(&clip, extents);
+		region_maybe_clip(&clip, gc->pCompositeClip);
+
+		do {
+			BoxRec box[4], *c, *b;
+			int16_t x = r->x + drawable->x;
+			int16_t y = r->y + drawable->y;
+			int16_t width = r->width;
+			int16_t height = r->height;
+			int count, i, j;
+			r++;
+
+			if (height < offset2 || width < offset1) {
+				if (height == 0) {
+					box[0].x1 = x;
+					box[0].x2 = x + width + 1;
+				} else {
+					box[0].x1 = x - offset1;
+					box[0].x2 = box[0].x1 + width + offset2;
+				}
+				if (width == 0) {
+					box[0].y1 = y;
+					box[0].y2 = y + height + 1;
+				} else {
+					box[0].y1 = y - offset1;
+					box[0].y2 = box[0].y1 + height + offset2;
+				}
+				count = 1;
+			} else {
+				box[0].x1 = x - offset1;
+				box[0].x2 = box[0].x1 + width + offset2;
+				box[0].y1 = y - offset1;
+				box[0].y2 = box[0].y1 + offset2;
+
+				box[1].x1 = x - offset1;
+				box[1].x2 = box[1].x1 + offset2;
+				box[1].y1 = y + offset3;
+				box[1].y2 = y + height - offset1;
+
+				box[2].x1 = x + width - offset1;
+				box[2].x2 = box[2].x1 + offset2;
+				box[2].y1 = y + offset3;
+				box[2].y2 = y + height - offset1;
+
+				box[3] = box[1];
+				box[3].y1 += height;
+				box[3].y2 += height;
+				count = 4;
+			}
+
+			for (i = REGION_NUM_RECTS(&clip), c = REGION_RECTS(&clip); i--; c++) {
+				for (j = count, b = box; j--; b++) {
+					BoxRec clipped = *b;
+					if (box_intersect(&clipped, c)) {
+						clipped.x1 += dx;
+						clipped.x2 += dx;
+						clipped.y1 += dy;
+						clipped.y2 += dy;
+						DBG(("%s: blt (%d, %d), (%d, %d)\n",
+						     __FUNCTION__,
+						     clipped.x1, clipped.y1, clipped.x2, clipped.y2));
+						fill.box(sna, &fill, &clipped);
+						if (damage) {
+							assert_pixmap_contains_box(pixmap, &clipped);
+							sna_damage_add_box(damage, &clipped);
+						}
+					}
+				}
+			}
+		} while (--n);
+	}
+	goto done;
+
+no_damage_wide:
+	{
+		int offset2 = gc->lineWidth;
+		int offset1 = offset2 >> 1;
+		int offset3 = offset2 - offset1;
+
+		dx += drawable->x;
+		dy += drawable->y;
+
+		do {
+			BoxRec box;
+			int16_t x = r->x + dx;
+			int16_t y = r->y + dy;
+			int16_t width = r->width;
+			int16_t height = r->height;
+			r++;
+
+			if (height < offset2 || width < offset1) {
+				if (height == 0) {
+					box.x1 = x;
+					box.x2 = x + width + 1;
+				} else {
+					box.x1 = x - offset1;
+					box.x2 = box.x1 + width + offset2;
+				}
+				if (width == 0) {
+					box.y1 = y;
+					box.y2 = y + height + 1;
+				} else {
+					box.y1 = y - offset1;
+					box.y2 = box.y1 + height + offset2;
+				}
+				fill.box(sna, &fill, &box);
+			} else {
+				box.x1 = x - offset1;
+				box.x2 = box.x1 + width + offset2;
+				box.y1 = y - offset1;
+				box.y2 = box.y1 + offset2;
+				fill.box(sna, &fill, &box);
+
+				box.x1 = x - offset1;
+				box.x2 = box.x1 + offset2;
+				box.y1 = y + offset3;
+				box.y2 = y + height - offset1;
+				fill.box(sna, &fill, &box);
+
+				box.x1 = x + width - offset1;
+				box.x2 = box.x1 + offset2;
+				fill.box(sna, &fill, &box);
+
+				box.x1 = x - offset1;
+				box.x2 = box.x1 + width + offset2;
+				box.y1 = y + height - offset1;
+				box.y2 = box.y1 + offset2;
+				fill.box(sna, &fill, &box);
+			}
+
+		} while (--n);
+	}
+	goto done;
+
+damage_wide:
+	{
+		int offset2 = gc->lineWidth;
+		int offset1 = offset2 >> 1;
+		int offset3 = offset2 - offset1;
+
+		dx += drawable->x;
+		dy += drawable->y;
+
+		do {
+			BoxRec box;
+			int16_t x = r->x + dx;
+			int16_t y = r->y + dy;
+			int16_t width = r->width;
+			int16_t height = r->height;
+			r++;
+
+			if (height < offset2 || width < offset1) {
+				if (height == 0) {
+					box.x1 = x;
+					box.x2 = x + width + 1;
+				} else {
+					box.x1 = x - offset1;
+					box.x2 = box.x1 + width + offset2;
+				}
+				if (width == 0) {
+					box.y1 = y;
+					box.y2 = y + height + 1;
+				} else {
+					box.y1 = y - offset1;
+					box.y2 = box.y1 + height + offset2;
+				}
+				fill.box(sna, &fill, &box);
+				assert_pixmap_contains_box(pixmap, &box);
+				sna_damage_add_box(damage, &box);
+			} else {
+				box.x1 = x - offset1;
+				box.x2 = box.x1 + width + offset2;
+				box.y1 = y - offset1;
+				box.y2 = box.y1 + offset2;
+				fill.box(sna, &fill, &box);
+				assert_pixmap_contains_box(pixmap, &box);
+				sna_damage_add_box(damage, &box);
+
+				box.x1 = x - offset1;
+				box.x2 = box.x1 + offset2;
+				box.y1 = y + offset3;
+				box.y2 = y + height - offset1;
+				fill.box(sna, &fill, &box);
+				assert_pixmap_contains_box(pixmap, &box);
+				sna_damage_add_box(damage, &box);
+
+				box.x1 = x + width - offset1;
+				box.x2 = box.x1 + offset2;
+				fill.box(sna, &fill, &box);
+				assert_pixmap_contains_box(pixmap, &box);
+				sna_damage_add_box(damage, &box);
+
+				box.x1 = x - offset1;
+				box.x2 = box.x1 + width + offset2;
+				box.y1 = y + height - offset1;
+				box.y2 = box.y1 + offset2;
+				fill.box(sna, &fill, &box);
+				assert_pixmap_contains_box(pixmap, &box);
+				sna_damage_add_box(damage, &box);
+			}
+		} while (--n);
+	}
+	goto done;
+
+done:
+	fill.done(sna, &fill);
+	return TRUE;
+}
+
+static void
+sna_poly_rectangle(DrawablePtr drawable, GCPtr gc, int n, xRectangle *r)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	RegionRec region;
+	unsigned flags;
+
+	DBG(("%s(n=%d, first=((%d, %d)x(%d, %d)), lineWidth=%d\n",
+	     __FUNCTION__,
+	     n, r->x, r->y, r->width, r->height,
+	     gc->lineWidth));
+
+	flags = sna_poly_rectangle_extents(drawable, gc, n, r, &region.extents);
+	if (flags == 0)
+		return;
+
+	DBG(("%s: extents=(%d, %d), (%d, %d), flags=%x\n", __FUNCTION__,
+	     region.extents.x1, region.extents.y1,
+	     region.extents.x2, region.extents.y2,
+	     flags));
+
+	if (FORCE_FALLBACK)
+		goto fallback;
+
+	if (wedged(sna)) {
+		DBG(("%s: fallback -- wedged\n", __FUNCTION__));
+		goto fallback;
+	}
+
+	DBG(("%s: line=%d [%d], join=%d [%d], mask=%lu [%d]\n",
+	     __FUNCTION__,
+	     gc->lineStyle, gc->lineStyle == LineSolid,
+	     gc->joinStyle, gc->joinStyle == JoinMiter,
+	     gc->planemask, PM_IS_SOLID(drawable, gc->planemask)));
+	if (gc->lineStyle == LineSolid &&
+	    gc->joinStyle == JoinMiter &&
+	    PM_IS_SOLID(drawable, gc->planemask)) {
+		struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable);
+
+		DBG(("%s: trying blt solid fill [%08lx] paths\n",
+		     __FUNCTION__, gc->fgPixel));
+
+		if (sna_drawable_use_gpu_bo(drawable, &region.extents) &&
+		    sna_poly_rectangle_blt(drawable, priv->gpu_bo,
+					   priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, &region.extents),
+					   gc, n, r, &region.extents, flags&2))
+			return;
+
+		if (sna_drawable_use_cpu_bo(drawable, &region.extents) &&
+		    sna_poly_rectangle_blt(drawable, priv->cpu_bo,
+					   reduce_damage(drawable, &priv->cpu_damage, &region.extents),
+					   gc, n, r, &region.extents, flags&2))
+			return;
+	}
+
+	/* Not a trivial outline, but we still maybe able to break it
+	 * down into simpler operations that we can accelerate.
+	 */
+	if (sna_drawable_use_gpu_bo(drawable, &region.extents)) {
+		miPolyRectangle(drawable, gc, n, r);
+		return;
+	}
+
+fallback:
+	DBG(("%s: fallback\n", __FUNCTION__));
+
+	region.data = NULL;
+	region_maybe_clip(&region, gc->pCompositeClip);
+	if (!RegionNotEmpty(&region))
+		return;
+
+	sna_gc_move_to_cpu(gc);
+	sna_drawable_move_region_to_cpu(drawable, &region, true);
+	RegionUninit(&region);
+
+	DBG(("%s: fbPolyRectangle\n", __FUNCTION__));
+	fbPolyRectangle(drawable, gc, n, r);
+}
+
 static Bool
 sna_poly_arc_extents(DrawablePtr drawable, GCPtr gc,
 		     int n, xArc *arc,
@@ -3901,7 +4457,7 @@ static const GCOps sna_gc_ops = {
 	sna_poly_point,
 	sna_poly_line,
 	sna_poly_segment,
-	miPolyRectangle,
+	sna_poly_rectangle,
 	sna_poly_arc,
 	miFillPolygon,
 	sna_poly_fill_rect,


More information about the xorg-commit mailing list