xf86-video-intel: 6 commits - src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_reg.h

Chris Wilson ickle at kemper.freedesktop.org
Tue Nov 1 14:53:47 PDT 2011


 src/sna/sna_accel.c | 1297 ++++++++++++++++++++++++++++++++++++++++++++++++----
 src/sna/sna_blt.c   |    6 
 src/sna/sna_reg.h   |   46 -
 3 files changed, 1223 insertions(+), 126 deletions(-)

New commits:
commit 42e2036cda6b76f2b64e28a81bdaa661e4cbffa5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 1 20:56:48 2011 +0000

    sna: Use bo for scratch pixmaps allocated by the dix/mi routines
    
    Now that we have the rudiments of accelerated deep-plane copies, we can
    begin to benefit from using BO for the core dix/mi routines like
    ShmPutImage.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index d1340f1..4fc1337 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -60,6 +60,7 @@
 
 #define USE_SPANS 0
 #define USE_ZERO_SPANS 1
+#define USE_BO_FOR_SCRATCH_PIXMAP 1
 
 DevPrivateKeyRec sna_pixmap_index;
 DevPrivateKey sna_window_key;
@@ -345,7 +346,13 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 	     width, height, depth, usage));
 
 	if (usage == CREATE_PIXMAP_USAGE_SCRATCH)
+#if USE_BO_FOR_SCRATCH_PIXMAP
+		return sna_pixmap_create_scratch(screen,
+						 width, height, depth,
+						 I915_TILING_X);
+#else
 		return fbCreatePixmap(screen, width, height, depth, usage);
+#endif
 
 	if (usage == SNA_CREATE_SCRATCH)
 		return sna_pixmap_create_scratch(screen,
commit 45cc952994e169590ac4300f4ccf54810377f7f2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 1 20:20:35 2011 +0000

    sna: Fix debugging assertions for Composite
    
    When comparing drawable clip extents against pixmap boundaries we need
    to include the pixmap screen offset on a Composited desktop.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6e8fa7f..d1340f1 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2333,10 +2333,10 @@ no_damage_clipped:
 		if (!RegionNotEmpty(&clip))
 			return TRUE;
 
-		assert(clip.extents.x1 >= 0);
-		assert(clip.extents.y1 >= 0);
-		assert(clip.extents.x2 <= pixmap->drawable.width);
-		assert(clip.extents.y2 <= pixmap->drawable.height);
+		assert(dx + clip.extents.x1 >= 0);
+		assert(dy + clip.extents.y1 >= 0);
+		assert(dx + clip.extents.x2 <= pixmap->drawable.width);
+		assert(dy + clip.extents.y2 <= pixmap->drawable.height);
 
 		DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
 		     __FUNCTION__,
@@ -2444,10 +2444,10 @@ damage_clipped:
 		if (!RegionNotEmpty(&clip))
 			return TRUE;
 
-		assert(clip.extents.x1 >= 0);
-		assert(clip.extents.y1 >= 0);
-		assert(clip.extents.x2 <= pixmap->drawable.width);
-		assert(clip.extents.y2 <= pixmap->drawable.height);
+		assert(dx + clip.extents.x1 >= 0);
+		assert(dy + clip.extents.y1 >= 0);
+		assert(dx + clip.extents.x2 <= pixmap->drawable.width);
+		assert(dy + clip.extents.y2 <= pixmap->drawable.height);
 
 		DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n",
 		     __FUNCTION__,
commit ebc9f3966d28343aebeba6886500abf511631785
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 1 19:47:51 2011 +0000

    sna: Accelerate deep-plane copy
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 29ea66c..6e8fa7f 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2872,6 +2872,212 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
 	sna->blt_state.fill_bo = 0;
 }
 
+static void
+sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
+		   BoxPtr box, int n,
+		   int sx, int sy,
+		   Bool reverse, Bool upsidedown, Pixel bitplane,
+		   void *closure)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr dst_pixmap = get_drawable_pixmap(drawable);
+	PixmapPtr src_pixmap = get_drawable_pixmap(source);
+	struct sna_pixmap *priv = sna_pixmap(dst_pixmap);
+	int16_t dx, dy;
+	int bit = ffs(bitplane) - 1;
+	uint32_t br00, br13;
+
+	DBG(("%s: plane=%x [%d] x%d\n", __FUNCTION__,
+	     (unsigned)bitplane, bit, n));
+
+	if (n == 0)
+		return;
+
+	sna_pixmap_move_to_cpu(src_pixmap, false);
+	get_drawable_deltas(source, src_pixmap, &dx, &dy);
+	sx += dx;
+	sy += dy;
+
+	get_drawable_deltas(drawable, dst_pixmap, &dx, &dy);
+	if (closure)
+		sna_damage_add_boxes(closure, box, n, dx, dy);
+
+	br00 = XY_MONO_SRC_COPY;
+	if (drawable->bitsPerPixel == 32)
+		br00 |= 3 << 20;
+	br13 = priv->gpu_bo->pitch;
+	if (sna->kgem.gen >= 40) {
+		if (priv->gpu_bo->tiling)
+			br00 |= BLT_DST_TILED;
+		br13 >>= 2;
+	}
+	br13 |= blt_depth(drawable->depth) << 24;
+	br13 |= copy_ROP[gc->alu] << 16;
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	do {
+		int bx1 = (box->x1 + sx) & ~7;
+		int bx2 = (box->x2 + sx + 7) & ~7;
+		int bw = (bx2 - bx1)/8;
+		int bh = box->y2 - box->y1;
+		int bstride = ALIGN(bw, 2);
+		uint32_t *b;
+		struct kgem_bo *upload;
+		void *ptr;
+
+		DBG(("%s: box(%d, %d), (%d, %d), sx=(%d,%d) bx=[%d, %d]\n",
+		     __FUNCTION__,
+		     box->x1, box->y1,
+		     box->x2, box->y2,
+		     sx, sy, bx1, bx2));
+
+		if (!kgem_check_batch(&sna->kgem, 8) ||
+		    !kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL) ||
+		    !kgem_check_reloc(&sna->kgem, 2)) {
+			_kgem_submit(&sna->kgem);
+			_kgem_set_mode(&sna->kgem, KGEM_BLT);
+		}
+
+		upload = kgem_create_buffer(&sna->kgem,
+					    bstride*bh,
+					    KGEM_BUFFER_WRITE,
+					    &ptr);
+		if (!upload)
+			break;
+
+		switch (source->bitsPerPixel) {
+		case 32:
+			{
+				uint32_t *src = src_pixmap->devPrivate.ptr;
+				uint32_t src_stride = src_pixmap->devKind/sizeof(uint32_t);
+				uint8_t *dst = ptr;
+
+				src += (box->y1 + sy) * src_stride;
+				src += bx1;
+
+				src_stride -= bw * 8;
+				bstride -= bw;
+
+				do {
+					int i = bw;
+					do {
+						uint8_t v = 0;
+
+						v |= ((*src++ >> bit) & 1) << 7;
+						v |= ((*src++ >> bit) & 1) << 6;
+						v |= ((*src++ >> bit) & 1) << 5;
+						v |= ((*src++ >> bit) & 1) << 4;
+						v |= ((*src++ >> bit) & 1) << 3;
+						v |= ((*src++ >> bit) & 1) << 2;
+						v |= ((*src++ >> bit) & 1) << 1;
+						v |= ((*src++ >> bit) & 1) << 0;
+
+						*dst++ = v;
+					} while (--i);
+					dst += bstride;
+					src += src_stride;
+				} while (--bh);
+				break;
+			}
+		case 16:
+			{
+				uint16_t *src = src_pixmap->devPrivate.ptr;
+				uint16_t src_stride = src_pixmap->devKind/sizeof(uint16_t);
+				uint8_t *dst = ptr;
+
+				src += (box->y1 + sy) * src_stride;
+				src += bx1;
+
+				src_stride -= bw * 8;
+				bstride -= bw;
+
+				do {
+					int i = bw;
+					do {
+						uint8_t v = 0;
+
+						v |= ((*src++ >> bit) & 1) << 7;
+						v |= ((*src++ >> bit) & 1) << 6;
+						v |= ((*src++ >> bit) & 1) << 5;
+						v |= ((*src++ >> bit) & 1) << 4;
+						v |= ((*src++ >> bit) & 1) << 3;
+						v |= ((*src++ >> bit) & 1) << 2;
+						v |= ((*src++ >> bit) & 1) << 1;
+						v |= ((*src++ >> bit) & 1) << 0;
+
+						*dst++ = v;
+					} while (--i);
+					dst += bstride;
+					src += src_stride;
+				} while (--bh);
+				break;
+			}
+		case 8:
+			{
+				uint8_t *src = src_pixmap->devPrivate.ptr;
+				uint8_t src_stride = src_pixmap->devKind/sizeof(uint8_t);
+				uint8_t *dst = ptr;
+
+				src += (box->y1 + sy) * src_stride;
+				src += bx1;
+
+				src_stride -= bw * 8;
+				bstride -= bw;
+
+				do {
+					int i = bw;
+					do {
+						uint8_t v = 0;
+
+						v |= ((*src++ >> bit) & 1) << 7;
+						v |= ((*src++ >> bit) & 1) << 6;
+						v |= ((*src++ >> bit) & 1) << 5;
+						v |= ((*src++ >> bit) & 1) << 4;
+						v |= ((*src++ >> bit) & 1) << 3;
+						v |= ((*src++ >> bit) & 1) << 2;
+						v |= ((*src++ >> bit) & 1) << 1;
+						v |= ((*src++ >> bit) & 1) << 0;
+
+						*dst++ = v;
+					} while (--i);
+					dst += bstride;
+					src += src_stride;
+				} while (--bh);
+				break;
+			}
+		default:
+			assert(0);
+			return;
+		}
+
+		b = sna->kgem.batch + sna->kgem.nbatch;
+		b[0] = br00 | ((box->x1 + sx) & 7) << 17;
+		b[1] = br13;
+		b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+		b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+		b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+				      priv->gpu_bo,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      I915_GEM_DOMAIN_RENDER |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+				      upload,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[6] = gc->bgPixel;
+		b[7] = gc->fgPixel;
+
+		sna->kgem.nbatch += 8;
+		kgem_bo_destroy(&sna->kgem, upload);
+
+		box++;
+	} while (--n);
+
+	sna->blt_state.fill_bo = 0;
+}
+
 static RegionPtr
 sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	       int src_x, int src_y,
@@ -2901,13 +3107,13 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 
 	if (sna_drawable_use_gpu_bo(dst, &region.extents)) {
 		struct sna_pixmap *priv = sna_pixmap(get_drawable_pixmap(dst));
-		if (src->depth == 1 && priv->gpu_bo->tiling != I915_TILING_Y) {
+		if (priv->gpu_bo->tiling != I915_TILING_Y) {
 			RegionUninit(&region);
 			return miDoCopy(src, dst, gc,
 					src_x, src_y,
 					w, h,
 					dst_x, dst_y,
-					sna_copy_bitmap_blt,
+					src->depth == 1 ? sna_copy_bitmap_blt :sna_copy_plane_blt,
 					(Pixel)bit,
 					priv->gpu_only ? NULL : reduce_damage(dst, &priv->gpu_damage, &region.extents));
 		}
commit e0fd07bc251296784bf70f02877765171a053cc6
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 1 17:02:17 2011 +0000

    sna: Accelerate XYPixmap upload when using GXcopy
    
    Mostly for the lols.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b2d0c33..29ea66c 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -140,6 +140,28 @@ static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char
 #define assert_pixmap_contains_box(p, b)
 #endif
 
+inline static bool
+sna_fill_init_blt(struct sna_fill_op *fill,
+		  struct sna *sna,
+		  PixmapPtr pixmap,
+		  struct kgem_bo *bo,
+		  uint8_t alu,
+		  uint32_t pixel)
+{
+	return sna->render.fill(sna, alu, pixmap, bo, pixel, fill);
+}
+
+static Bool
+sna_copy_init_blt(struct sna_copy_op *copy,
+		  struct sna *sna,
+		  PixmapPtr src, struct kgem_bo *src_bo,
+		  PixmapPtr dst, struct kgem_bo *dst_bo,
+		  uint8_t alu)
+{
+	memset(copy, 0, sizeof(*copy));
+	return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
+}
+
 static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
 {
 	kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
@@ -1446,6 +1468,133 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	return true;
 }
 
+static Bool
+sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
+		    int x, int y, int w, int  h, int left,char *bits)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	struct kgem_bo *bo = priv->gpu_bo;
+	int16_t dx, dy;
+	unsigned i, skip;
+
+	if (gc->alu != GXcopy)
+		return false;
+
+	if (!sna_drawable_use_gpu_bo(&pixmap->drawable, &region->extents))
+		return false;
+
+	if (bo->tiling == I915_TILING_Y)
+		return false;
+
+	assert_pixmap_contains_box(pixmap, RegionExtents(region));
+	if (!priv->gpu_only)
+		sna_damage_add(&priv->gpu_damage, region);
+
+	DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h));
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	x += dx + drawable->x;
+	y += dy + drawable->y;
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+
+	skip = h * BitmapBytePad(w + left);
+	for (i = 1 << (gc->depth-1); i; i >>= 1, bits += skip) {
+		const BoxRec *box = REGION_RECTS(region);
+		int n = REGION_NUM_RECTS(region);
+
+		if ((gc->planemask & i) == 0)
+			continue;
+
+		/* Region is pre-clipped and translated into pixmap space */
+		do {
+			int bx1 = (box->x1 - x) & ~7;
+			int bx2 = (box->x2 - x + 7) & ~7;
+			int bw = (bx2 - bx1)/8;
+			int bh = box->y2 - box->y1;
+			int bstride = ALIGN(bw, 2);
+			int src_stride;
+			uint8_t *dst, *src;
+			uint32_t *b;
+			struct kgem_bo *upload;
+			void *ptr;
+
+			if (!kgem_check_batch(&sna->kgem, 12) ||
+			    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+			    !kgem_check_reloc(&sna->kgem, 2)) {
+				_kgem_submit(&sna->kgem);
+				_kgem_set_mode(&sna->kgem, KGEM_BLT);
+			}
+
+			upload = kgem_create_buffer(&sna->kgem,
+						    bstride*bh,
+						    KGEM_BUFFER_WRITE,
+						    &ptr);
+			if (!upload)
+				break;
+
+			dst = ptr;
+			bstride -= bw;
+
+			src_stride = BitmapBytePad(w);
+			src = (uint8_t*)bits + (box->y1 - y) * src_stride + bx1/8;
+			src_stride -= bw;
+			do {
+				int i = bw;
+				do {
+					*dst++ = byte_reverse(*src++);
+				} while (--i);
+				dst += bstride;
+				src += src_stride;
+			} while (--bh);
+
+			b = sna->kgem.batch + sna->kgem.nbatch;
+			b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT;
+			if (drawable->bitsPerPixel >=3224)
+				b[0] |= 3 << 20;
+			b[0] |= ((box->x1 - x) & 7) << 17;
+			b[1] = bo->pitch;
+			if (sna->kgem.gen >= 40) {
+				if (bo->tiling)
+					b[0] |= BLT_DST_TILED;
+				b[1] >>= 2;
+			}
+			b[1] |= 1 << 31; /* solid pattern */
+			b[1] |= blt_depth(drawable->depth) << 24;
+			b[1] |= 0xce << 16; /* S or (D and !P) */
+			b[2] = box->y1 << 16 | box->x1;
+			b[3] = box->y2 << 16 | box->x2;
+			b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+					      bo,
+					      I915_GEM_DOMAIN_RENDER << 16 |
+					      I915_GEM_DOMAIN_RENDER |
+					      KGEM_RELOC_FENCED,
+					      0);
+			b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+					      upload,
+					      I915_GEM_DOMAIN_RENDER << 16 |
+					      KGEM_RELOC_FENCED,
+					      0);
+			b[6] = 0;
+			b[7] = i;
+			b[8] = i;
+			b[9] = i;
+			b[10] = -1;
+			b[11] = -1;
+
+			sna->kgem.nbatch += 12;
+			kgem_bo_destroy(&sna->kgem, upload);
+
+			box++;
+		} while (--n);
+	}
+
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
 static void
 sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 	      int x, int y, int w, int h, int left, int format,
@@ -1491,11 +1640,11 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 	if (!RegionNotEmpty(&region))
 		return;
 
-	if (!PM_IS_SOLID(drawable, gc->planemask))
-		goto fallback;
-
 	switch (format) {
 	case ZPixmap:
+		if (!PM_IS_SOLID(drawable, gc->planemask))
+			goto fallback;
+
 		if (sna_put_zpixmap_blt(drawable, gc, &region,
 					x, y, w, h,
 					bits, PixmapBytePad(w, depth)))
@@ -1503,12 +1652,22 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 		break;
 
 	case XYBitmap:
+		if (!PM_IS_SOLID(drawable, gc->planemask))
+			goto fallback;
+
 		if (sna_put_xybitmap_blt(drawable, gc, &region,
 					 x, y, w, h,
 					 bits))
 			return;
 		break;
 
+	case XYPixmap:
+		if (sna_put_xypixmap_blt(drawable, gc, &region,
+					 x, y, w, h, left,
+					 bits))
+			return;
+		break;
+
 	default:
 		break;
 	}
@@ -2032,28 +2191,6 @@ box_intersect(BoxPtr a, const BoxRec *b)
 	return a->x1 < a->x2 && a->y1 < a->y2;
 }
 
-inline static bool
-sna_fill_init_blt(struct sna_fill_op *fill,
-		  struct sna *sna,
-		  PixmapPtr pixmap,
-		  struct kgem_bo *bo,
-		  uint8_t alu,
-		  uint32_t pixel)
-{
-	return sna->render.fill(sna, alu, pixmap, bo, pixel, fill);
-}
-
-static Bool
-sna_copy_init_blt(struct sna_copy_op *copy,
-		  struct sna *sna,
-		  PixmapPtr src, struct kgem_bo *src_bo,
-		  PixmapPtr dst, struct kgem_bo *dst_bo,
-		  uint8_t alu)
-{
-	memset(copy, 0, sizeof(*copy));
-	return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
-}
-
 static const BoxRec *
 find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
 {
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index ae24e62..12dd2a0 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -330,7 +330,7 @@ static void sna_blt_copy_one(struct sna *sna,
 	/* Compare against a previous fill */
 	if (kgem->nbatch >= 6 &&
 	    blt->overwrites &&
-	    kgem->batch[kgem->nbatch-6] == ((blt->cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) &&
+	    kgem->batch[kgem->nbatch-6] == ((blt->cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
 	    kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
 	    kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) &&
 	    kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->handle) {
@@ -1515,7 +1515,7 @@ static Bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 	assert(box->x1 >= 0);
 	assert(box->y1 >= 0);
 
-	cmd = XY_COLOR_BLT_CMD;
+	cmd = XY_COLOR_BLT;
 	if (bpp == 32)
 		cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
 
@@ -1776,7 +1776,7 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 	if (kgem->nbatch >= 6 &&
 	    (alu == GXcopy || alu == GXclear) &&
 	    kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->handle &&
-	    kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) &&
+	    kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
 	    kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
 	    kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
 		DBG(("%s: deleting last fill\n", __FUNCTION__));
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index f1fbd8b..ff2ff3b 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -43,7 +43,7 @@
 #define BLT_DST_TILED		(1<<11)
 
 #define COLOR_BLT_CMD			((2<<29)|(0x40<<22)|(0x3))
-#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|(0x4))
+#define XY_COLOR_BLT			((2<<29)|(0x50<<22)|(0x4))
 #define XY_SETUP_BLT			((2<<29)|(1<<22)|6)
 #define XY_SETUP_MONO_PATTERN_SL_BLT	((2<<29)|(0x11<<22)|7)
 #define XY_SETUP_CLIP			((2<<29)|(3<<22)|1)
@@ -55,6 +55,7 @@
 #define XY_MONO_PAT			((0x2<<29)|(0x52<<22)|0x7)
 #define XY_MONO_SRC_COPY		((0x2<<29)|(0x54<<22)|(0x6))
 #define XY_MONO_SRC_COPY_IMM		((0x2<<29)|(0x71<<22))
+#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT	((0x2<<29)|(0x58<<22)|0xa)
 
 /* FLUSH commands */
 #define BRW_3D(Pipeline,Opcode,Subopcode) \
commit 22c43efe6b9b5f669593aa9f3af6ee437426c5d2
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 1 13:43:23 2011 +0000

    sna: Implement 8x8 stippled rect fills
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 67a4bd3..b2d0c33 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -82,6 +82,24 @@ static const uint8_t copy_ROP[] = {
 	ROP_DSan,               /* GXnand */
 	ROP_1                   /* GXset */
 };
+static const uint8_t fill_ROP[] = {
+	ROP_0,
+	ROP_DPa,
+	ROP_PDna,
+	ROP_P,
+	ROP_DPna,
+	ROP_D,
+	ROP_DPx,
+	ROP_DPo,
+	ROP_DPon,
+	ROP_PDxn,
+	ROP_Dn,
+	ROP_PDno,
+	ROP_Pn,
+	ROP_DPno,
+	ROP_DPan,
+	ROP_1
+};
 
 static inline void region_set(RegionRec *r, const BoxRec *b)
 {
@@ -5542,6 +5560,198 @@ done:
 }
 
 static bool
+sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
+				    struct kgem_bo *bo,
+				    struct sna_damage **damage,
+				    GCPtr gc, int n, xRectangle *r,
+				    const BoxRec *extents, unsigned clipped)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	uint32_t pat[2], br00, br13;
+	int16_t dx, dy;
+
+	DBG(("%s: alu=%d, upload (%d, %d), (%d, %d), origin (%d, %d)\n",
+	     __FUNCTION__, gc->alu,
+	     extents->x1, extents->y1,
+	     extents->x2, extents->y2,
+	     gc->patOrg.x, gc->patOrg.y));
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	{
+		unsigned px = (gc->patOrg.x - dx) & 7;
+		unsigned py = (gc->patOrg.y - dy) & 7;
+		DBG(("%s: pat offset (%d, %d)\n", __FUNCTION__ ,px, py));
+		br00 = XY_MONO_PAT | px << 12 | py << 8;
+		if (drawable->bitsPerPixel == 32)
+			br00 |= 3 << 20;
+
+		br13 = bo->pitch;
+		if (sna->kgem.gen >= 40) {
+			if (bo->tiling)
+				br00 |= BLT_DST_TILED;
+			br13 >>= 2;
+		}
+		br13 |= (gc->fillStyle == FillStippled) << 28;
+		br13 |= blt_depth(drawable->depth) << 24;
+		br13 |= fill_ROP[gc->alu] << 16;
+	}
+
+	{
+		uint8_t *dst = (uint8_t *)pat;
+		const uint8_t *src = gc->stipple->devPrivate.ptr;
+		int stride = gc->stipple->devKind;
+		int n = 8;
+		do {
+			*dst++ = byte_reverse(*src);
+			src += stride;
+		} while (--n);
+	}
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	if (!clipped) {
+		dx += drawable->x;
+		dy += drawable->y;
+
+		sna_damage_add_rectangles(damage, r, n, dx, dy);
+		do {
+			uint32_t *b;
+
+			DBG(("%s: rect (%d, %d)x(%d, %d)\n",
+			     __FUNCTION__, r->x + dx, r->y + dy, r->width, r->height));
+
+			if (!kgem_check_batch(&sna->kgem, 9) ||
+			    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+			    !kgem_check_reloc(&sna->kgem, 1)) {
+				_kgem_submit(&sna->kgem);
+				_kgem_set_mode(&sna->kgem, KGEM_BLT);
+			}
+
+			b = sna->kgem.batch + sna->kgem.nbatch;
+			b[0] = br00;
+			b[1] = br13;
+			b[2] = (r->y + dy) << 16 | (r->x + dx);
+			b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+			b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+					      I915_GEM_DOMAIN_RENDER << 16 |
+					      I915_GEM_DOMAIN_RENDER |
+					      KGEM_RELOC_FENCED,
+					      0);
+			b[5] = gc->bgPixel;
+			b[6] = gc->fgPixel;
+			b[7] = pat[0];
+			b[8] = pat[1];
+			sna->kgem.nbatch += 9;
+
+			r++;
+		} while (--n);
+	} else {
+		RegionRec clip;
+
+		region_set(&clip, extents);
+		region_maybe_clip(&clip, gc->pCompositeClip);
+		if (!RegionNotEmpty(&clip))
+			return true;
+
+		/* XXX XY_SETUP_BLT + XY_SCANLINE_BLT */
+
+		if (clip.data == NULL) {
+			do {
+				BoxRec box;
+
+				box.x1 = r->x + drawable->x;
+				box.y1 = r->y + drawable->y;
+				box.x2 = bound(box.x1, r->width);
+				box.y2 = bound(box.y1, r->height);
+				r++;
+
+				if (box_intersect(&box, &clip.extents)) {
+					uint32_t *b;
+
+					if (!kgem_check_batch(&sna->kgem, 9) ||
+					    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+					    !kgem_check_reloc(&sna->kgem, 1)) {
+						_kgem_submit(&sna->kgem);
+						_kgem_set_mode(&sna->kgem, KGEM_BLT);
+					}
+
+					b = sna->kgem.batch + sna->kgem.nbatch;
+					b[0] = br00;
+					b[1] = br13;
+					b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+					b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+					b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+							      I915_GEM_DOMAIN_RENDER << 16 |
+							      I915_GEM_DOMAIN_RENDER |
+							      KGEM_RELOC_FENCED,
+							      0);
+					b[5] = gc->bgPixel;
+					b[6] = gc->fgPixel;
+					b[7] = pat[0];
+					b[8] = pat[1];
+					sna->kgem.nbatch += 9;
+				}
+			} while (n--);
+		} else {
+			const BoxRec * const clip_start = RegionBoxptr(&clip);
+			const BoxRec * const clip_end = clip_start + clip.data->numRects;
+			const BoxRec *c;
+
+			do {
+				BoxRec box;
+
+				box.x1 = r->x + drawable->x;
+				box.y1 = r->y + drawable->y;
+				box.x2 = bound(box.x1, r->width);
+				box.y2 = bound(box.y1, r->height);
+				r++;
+
+				c = find_clip_box_for_y(clip_start,
+							clip_end,
+							box.y1);
+				while (c != clip_end) {
+					BoxRec bb;
+					if (box.y2 <= c->y1)
+						break;
+
+					bb = box;
+					if (box_intersect(&bb, c++)) {
+						uint32_t *b;
+
+						if (!kgem_check_batch(&sna->kgem, 9) ||
+						    !kgem_check_bo_fenced(&sna->kgem, bo, NULL) ||
+						    !kgem_check_reloc(&sna->kgem, 1)) {
+							_kgem_submit(&sna->kgem);
+							_kgem_set_mode(&sna->kgem, KGEM_BLT);
+						}
+
+						b = sna->kgem.batch + sna->kgem.nbatch;
+						b[0] = br00;
+						b[1] = br13;
+						b[2] = (bb.y1 + dy) << 16 | (bb.x1 + dx);
+						b[3] = (bb.y2 + dy) << 16 | (bb.x2 + dx);
+						b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+								      I915_GEM_DOMAIN_RENDER << 16 |
+								      I915_GEM_DOMAIN_RENDER |
+								      KGEM_RELOC_FENCED,
+								      0);
+						b[5] = gc->bgPixel;
+						b[6] = gc->fgPixel;
+						b[7] = pat[0];
+						b[8] = pat[1];
+						sna->kgem.nbatch += 9;
+					}
+
+				}
+			} while (--n);
+		}
+	}
+
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
+static bool
 sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
 				  struct kgem_bo *bo,
 				  struct sna_damage **damage,
@@ -5679,6 +5889,11 @@ sna_poly_fill_rect_stippled_blt(DrawablePtr drawable,
 	     extents->y2 - gc->patOrg.y - drawable->y,
 	     stipple->drawable.width, stipple->drawable.height));
 
+	if (stipple->drawable.width == 8 && stipple->drawable.height == 8)
+		return sna_poly_fill_rect_stippled_8x8_blt(drawable, bo, damage,
+							   gc, n, rect,
+							   extents, clipped);
+
 	if (extents->x2 - gc->patOrg.x - drawable->x <= stipple->drawable.width &&
 	    extents->y2 - gc->patOrg.y - drawable->y <= stipple->drawable.height)
 		return sna_poly_fill_rect_stippled_1_blt(drawable, bo, damage,
@@ -5752,13 +5967,13 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
 		goto fallback;
 
 	if (gc->fillStyle == FillSolid ||
-	    (gc->fillStyle == FillTiled && gc->tileIsPixel)) {
+	    (gc->fillStyle == FillTiled && gc->tileIsPixel) ||
+	    (gc->fillStyle == FillOpaqueStippled && gc->bgPixel == gc->fgPixel)) {
 		struct sna_pixmap *priv = sna_pixmap_from_drawable(draw);
-		uint32_t color = gc->fillStyle == FillSolid ? gc->fgPixel : gc->tile.pixel;
+		uint32_t color = gc->fillStyle == FillTiled ? gc->tile.pixel : gc->fgPixel;
 
 		DBG(("%s: solid fill [%08lx], testing for blt\n",
-		     __FUNCTION__,
-		     gc->fillStyle == FillSolid ? gc->fgPixel : gc->tile.pixel));
+		     __FUNCTION__, color));
 
 		if (sna_drawable_use_gpu_bo(draw, &region.extents) &&
 		    sna_poly_fill_rect_blt(draw,
@@ -5851,26 +6066,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 	uint32_t *b;
 	int16_t dx, dy;
 
-	/* XXX sna_blt! */
-	static const uint8_t ROP[] = {
-		ROP_0,                  /* GXclear */
-		ROP_DSa,                /* GXand */
-		ROP_SDna,               /* GXandReverse */
-		ROP_S,                  /* GXcopy */
-		ROP_DSna,               /* GXandInverted */
-		ROP_D,                  /* GXnoop */
-		ROP_DSx,                /* GXxor */
-		ROP_DSo,                /* GXor */
-		ROP_DSon,               /* GXnor */
-		ROP_DSxn,               /* GXequiv */
-		ROP_Dn,                 /* GXinvert */
-		ROP_SDno,               /* GXorReverse */
-		ROP_Sn,                 /* GXcopyInverted */
-		ROP_DSno,               /* GXorInverted */
-		ROP_DSan,               /* GXnand */
-		ROP_1                   /* GXset */
-	};
-	uint8_t rop = transparent ? ROP[gc->alu] : ROP_S;
+	uint8_t rop = transparent ? copy_ROP[gc->alu] : ROP_S;
 	RegionRec clip;
 
 	if (priv->gpu_bo->tiling == I915_TILING_Y) {
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index 9af3b50..f1fbd8b 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -52,9 +52,7 @@
 #define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
 #define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|0x4)
 #define XY_PAT_BLT_IMMEDIATE		((2<<29)|(0x72<<22))
-#define XY_MONO_PAT_BLT_CMD		((0x2<<29)|(0x52<<22)|0x7)
-#define XY_MONO_PAT_VERT_SEED		((1<<10)|(1<<9)|(1<<8))
-#define XY_MONO_PAT_HORT_SEED		((1<<14)|(1<<13)|(1<<12))
+#define XY_MONO_PAT			((0x2<<29)|(0x52<<22)|0x7)
 #define XY_MONO_SRC_COPY		((0x2<<29)|(0x54<<22)|(0x6))
 #define XY_MONO_SRC_COPY_IMM		((0x2<<29)|(0x71<<22))
 
commit c58b7643e9b86599a41ede516a778bd9869adc8e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 1 10:13:20 2011 +0000

    sna: Accelerate 1bpp uploads
    
    These still get used (see Wine and Swing) by applications which like to
    do "crisp" 1-bit rendering on the client side and then put onto the
    scanout. So avoid the readbacks, and push them through the BLT instead. It
    turns out to be faster than using fb too, bonus!
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 346038e..67a4bd3 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -64,6 +64,25 @@
 DevPrivateKeyRec sna_pixmap_index;
 DevPrivateKey sna_window_key;
 
+static const uint8_t copy_ROP[] = {
+	ROP_0,                  /* GXclear */
+	ROP_DSa,                /* GXand */
+	ROP_SDna,               /* GXandReverse */
+	ROP_S,                  /* GXcopy */
+	ROP_DSna,               /* GXandInverted */
+	ROP_D,                  /* GXnoop */
+	ROP_DSx,                /* GXxor */
+	ROP_DSo,                /* GXor */
+	ROP_DSon,               /* GXnor */
+	ROP_DSxn,               /* GXequiv */
+	ROP_Dn,                 /* GXinvert */
+	ROP_SDno,               /* GXorReverse */
+	ROP_Sn,                 /* GXcopyInverted */
+	ROP_DSno,               /* GXorInverted */
+	ROP_DSan,               /* GXnand */
+	ROP_1                   /* GXset */
+};
+
 static inline void region_set(RegionRec *r, const BoxRec *b)
 {
 	r->extents = *b;
@@ -1184,8 +1203,8 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 }
 
 static Bool
-sna_put_image_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
-		  int x, int y, int w, int  h, char *bits, int stride)
+sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
+		    int x, int y, int w, int  h, char *bits, int stride)
 {
 	struct sna *sna = to_sna_from_drawable(drawable);
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
@@ -1281,6 +1300,134 @@ sna_put_image_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	return true;
 }
 
+static inline uint8_t byte_reverse(uint8_t b)
+{
+	return ((b * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
+}
+
+static inline uint8_t blt_depth(int depth)
+{
+	switch (depth) {
+	case 8: return 0;
+	case 15: return 0x2;
+	case 16: return 0x1;
+	default: return 0x3;
+	}
+}
+
+static Bool
+sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
+		    int x, int y, int w, int  h, char *bits)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	BoxRec *box;
+	int16_t dx, dy;
+	int n;
+	uint8_t rop = copy_ROP[gc->alu];
+
+	if (!sna_drawable_use_gpu_bo(&pixmap->drawable, &region->extents))
+		return false;
+
+	if (priv->gpu_bo->tiling == I915_TILING_Y)
+		return false;
+
+	assert_pixmap_contains_box(pixmap, RegionExtents(region));
+	if (!priv->gpu_only)
+		sna_damage_add(&priv->gpu_damage, region);
+
+	DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h));
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	x += dx + drawable->x;
+	y += dy + drawable->y;
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+
+	/* Region is pre-clipped and translated into pixmap space */
+	box = REGION_RECTS(region);
+	n = REGION_NUM_RECTS(region);
+	do {
+		int bx1 = (box->x1 - x) & ~7;
+		int bx2 = (box->x2 - x + 7) & ~7;
+		int bw = (bx2 - bx1)/8;
+		int bh = box->y2 - box->y1;
+		int bstride = ALIGN(bw, 2);
+		int src_stride;
+		uint8_t *dst, *src;
+		uint32_t *b;
+		struct kgem_bo *upload;
+		void *ptr;
+
+		if (!kgem_check_batch(&sna->kgem, 8) ||
+		    !kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL) ||
+		    !kgem_check_reloc(&sna->kgem, 2)) {
+			_kgem_submit(&sna->kgem);
+			_kgem_set_mode(&sna->kgem, KGEM_BLT);
+		}
+
+		upload = kgem_create_buffer(&sna->kgem,
+					    bstride*bh,
+					    KGEM_BUFFER_WRITE,
+					    &ptr);
+		if (!upload)
+			break;
+
+		dst = ptr;
+		bstride -= bw;
+
+		src_stride = BitmapBytePad(w);
+		src = (uint8_t*)bits + (box->y1 - y) * src_stride + bx1/8;
+		src_stride -= bw;
+		do {
+			int i = bw;
+			do {
+				*dst++ = byte_reverse(*src++);
+			} while (--i);
+			dst += bstride;
+			src += src_stride;
+		} while (--bh);
+
+		b = sna->kgem.batch + sna->kgem.nbatch;
+		b[0] = XY_MONO_SRC_COPY;
+		if (drawable->bitsPerPixel == 32)
+			b[0] |= 3 << 20;
+		b[0] |= ((box->x1 - x) & 7) << 17;
+		b[1] = priv->gpu_bo->pitch;
+		if (sna->kgem.gen >= 40) {
+			if (priv->gpu_bo->tiling)
+				b[0] |= BLT_DST_TILED;
+			b[1] >>= 2;
+		}
+		b[1] |= blt_depth(drawable->depth) << 24;
+		b[1] |= rop << 16;
+		b[2] = box->y1 << 16 | box->x1;
+		b[3] = box->y2 << 16 | box->x2;
+		b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+				      priv->gpu_bo,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      I915_GEM_DOMAIN_RENDER |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+				      upload,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[6] = gc->bgPixel;
+		b[7] = gc->fgPixel;
+
+		sna->kgem.nbatch += 8;
+		kgem_bo_destroy(&sna->kgem, upload);
+
+		box++;
+	} while (--n);
+
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
 static void
 sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 	      int x, int y, int w, int h, int left, int format,
@@ -1323,20 +1470,41 @@ sna_put_image(DrawablePtr drawable, GCPtr gc, int depth,
 		RegionTranslate(clip, -dx, -dy);
 	}
 
-	if (RegionNotEmpty(&region) &&
-	    (format != ZPixmap || !PM_IS_SOLID(drawable, gc->planemask) ||
-	     !sna_put_image_blt(drawable, gc, &region,
-				x, y, w, h,
-				bits, PixmapBytePad(w, depth)))) {
-		RegionTranslate(&region, -dx, -dy);
+	if (!RegionNotEmpty(&region))
+		return;
 
-		sna_drawable_move_region_to_cpu(drawable, &region, true);
-		DBG(("%s: fbPutImage(%d, %d, %d, %d)\n",
-		     __FUNCTION__, x, y, w, h));
-		fbPutImage(drawable, gc, depth, x, y, w, h, left, format, bits);
+	if (!PM_IS_SOLID(drawable, gc->planemask))
+		goto fallback;
+
+	switch (format) {
+	case ZPixmap:
+		if (sna_put_zpixmap_blt(drawable, gc, &region,
+					x, y, w, h,
+					bits, PixmapBytePad(w, depth)))
+			return;
+		break;
+
+	case XYBitmap:
+		if (sna_put_xybitmap_blt(drawable, gc, &region,
+					 x, y, w, h,
+					 bits))
+			return;
+		break;
+
+	default:
+		break;
 	}
 
+fallback:
+	DBG(("%s: fallback\n", __FUNCTION__));
+	RegionTranslate(&region, -dx, -dy);
+
+	sna_drawable_move_region_to_cpu(drawable, &region, true);
 	RegionUninit(&region);
+
+	DBG(("%s: fbPutImage(%d, %d, %d, %d)\n",
+	     __FUNCTION__, x, y, w, h));
+	fbPutImage(drawable, gc, depth, x, y, w, h, left, format, bits);
 }
 
 static bool
@@ -2439,6 +2607,116 @@ sna_set_spans(DrawablePtr drawable, GCPtr gc, char *src,
 	fbSetSpans(drawable, gc, src, pt, width, n, sorted);
 }
 
+static void
+sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
+		    BoxPtr box, int n,
+		    int sx, int sy,
+		    Bool reverse, Bool upsidedown, Pixel bitplane,
+		    void *closure)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	PixmapPtr bitmap = (PixmapPtr)_bitmap;
+	int16_t dx, dy;
+	uint8_t rop = copy_ROP[gc->alu];
+
+	DBG(("%s: plane=%x x%d\n", __FUNCTION__, (unsigned)bitplane, n));
+
+	if (n == 0)
+		return;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	if (closure)
+		sna_damage_add_boxes(closure, box, n, dx, dy);
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+	do {
+		int bx1 = (box->x1 + sx) & ~7;
+		int bx2 = (box->x2 + sx + 7) & ~7;
+		int bw = (bx2 - bx1)/8;
+		int bh = box->y2 - box->y1;
+		int bstride = ALIGN(bw, 2);
+		int src_stride;
+		uint8_t *dst, *src;
+		uint32_t *b;
+		struct kgem_bo *upload;
+		void *ptr;
+
+		DBG(("%s: box(%d, %d), (%d, %d), sx=(%d,%d) bx=[%d, %d]\n",
+		     __FUNCTION__,
+		     box->x1, box->y1,
+		     box->x2, box->y2,
+		     sx, sy, bx1, bx2));
+
+		if (!kgem_check_batch(&sna->kgem, 8) ||
+		    !kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL) ||
+		    !kgem_check_reloc(&sna->kgem, 2)) {
+			_kgem_submit(&sna->kgem);
+			_kgem_set_mode(&sna->kgem, KGEM_BLT);
+		}
+
+		upload = kgem_create_buffer(&sna->kgem,
+					    bstride*bh,
+					    KGEM_BUFFER_WRITE,
+					    &ptr);
+		if (!upload)
+			break;
+
+		dst = ptr;
+		bstride -= bw;
+
+		src_stride = bitmap->devKind;
+		src = (uint8_t*)bitmap->devPrivate.ptr;
+		src += (box->y1 + sy) * src_stride + bx1/8;
+		src_stride -= bw;
+		do {
+			int i = bw;
+			do {
+				*dst++ = byte_reverse(*src++);
+			} while (--i);
+			dst += bstride;
+			src += src_stride;
+		} while (--bh);
+
+		b = sna->kgem.batch + sna->kgem.nbatch;
+		b[0] = XY_MONO_SRC_COPY;
+		if (drawable->bitsPerPixel == 32)
+			b[0] |= 3 << 20;
+		b[0] |= ((box->x1 + sx) & 7) << 17;
+		b[1] = priv->gpu_bo->pitch;
+		if (sna->kgem.gen >= 40) {
+			if (priv->gpu_bo->tiling)
+				b[0] |= BLT_DST_TILED;
+			b[1] >>= 2;
+		}
+		b[1] |= blt_depth(drawable->depth) << 24;
+		b[1] |= rop << 16;
+		b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+		b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+		b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+				      priv->gpu_bo,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      I915_GEM_DOMAIN_RENDER |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+				      upload,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[6] = gc->bgPixel;
+		b[7] = gc->fgPixel;
+
+		sna->kgem.nbatch += 8;
+		kgem_bo_destroy(&sna->kgem, upload);
+
+		box++;
+	} while (--n);
+
+	sna->blt_state.fill_bo = 0;
+}
+
 static RegionPtr
 sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	       int src_x, int src_y,
@@ -2451,6 +2729,12 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", __FUNCTION__,
 	     src_x, src_y, dst_x, dst_y, w, h));
 
+	if (src->bitsPerPixel == 1 && (bit&1) == 0)
+		return miHandleExposures(src, dst, gc,
+					 src_x, src_y,
+					 w, h,
+					 dst_x, dst_y, bit);
+
 	region.extents.x1 = dst_x + dst->x;
 	region.extents.y1 = dst_y + dst->y;
 	region.extents.x2 = region.extents.x1 + w;
@@ -2460,12 +2744,29 @@ sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	if (!RegionNotEmpty(&region))
 		return NULL;
 
+	if (sna_drawable_use_gpu_bo(dst, &region.extents)) {
+		struct sna_pixmap *priv = sna_pixmap(get_drawable_pixmap(dst));
+		if (src->depth == 1 && priv->gpu_bo->tiling != I915_TILING_Y) {
+			RegionUninit(&region);
+			return miDoCopy(src, dst, gc,
+					src_x, src_y,
+					w, h,
+					dst_x, dst_y,
+					sna_copy_bitmap_blt,
+					(Pixel)bit,
+					priv->gpu_only ? NULL : reduce_damage(dst, &priv->gpu_damage, &region.extents));
+		}
+	}
+
+	DBG(("%s: fallback\n", __FUNCTION__));
 	sna_drawable_move_region_to_cpu(dst, &region, true);
 	RegionTranslate(&region,
 			src_x - dst_x - dst->x + src->x,
 			src_y - dst_y - dst->y + src->y);
 	sna_drawable_move_region_to_cpu(src, &region, false);
 
+	DBG(("%s: fbCopyPlane(%d, %d, %d, %d, %d,%d) %x\n",
+	     __FUNCTION__, src_x, src_y, w, h, dst_x, dst_y, (unsigned)bit));
 	return fbCopyPlane(src, dst, gc, src_x, src_y, w, h, dst_x, dst_y, bit);
 }
 
@@ -5240,6 +5541,153 @@ done:
 	return TRUE;
 }
 
+static bool
+sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
+				  struct kgem_bo *bo,
+				  struct sna_damage **damage,
+				  GCPtr gc, int n, xRectangle *r,
+				  const BoxRec *extents, unsigned clipped)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	PixmapPtr stipple = gc->stipple;
+	const DDXPointRec *origin = &gc->patOrg;
+	int16_t dx, dy;
+	uint8_t rop = copy_ROP[gc->alu];
+
+	DBG(("%s: upload (%d, %d), (%d, %d), origin (%d, %d)\n", __FUNCTION__,
+	     extents->x1, extents->y1,
+	     extents->x2, extents->y2,
+	     origin->x, origin->y));
+
+	if (clipped) {
+		DBG(("%s: fallback -- clipped\n", __FUNCTION__));
+		return false;
+	}
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	dx += drawable->x;
+	dy += drawable->y;
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+
+	sna_damage_add_rectangles(damage, r, n, dx, dy);
+	do {
+		int bx1 = (r->x - origin->x) & ~7;
+		int bx2 = (r->x + r->width - origin->x + 7) & ~7;
+		int bw = (bx2 - bx1)/8;
+		int bh = r->height;
+		int bstride = ALIGN(bw, 2);
+		int src_stride;
+		uint8_t *dst, *src;
+		uint32_t *b;
+		struct kgem_bo *upload;
+		void *ptr;
+
+		DBG(("%s: rect (%d, %d)x(%d, %d) stipple [%d,%d]\n",
+		     __FUNCTION__,
+		     r->x, r->y, r->width, r->height,
+		     bx1, bx2));
+
+		if (!kgem_check_batch(&sna->kgem, 8) ||
+		    !kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL) ||
+		    !kgem_check_reloc(&sna->kgem, 2)) {
+			_kgem_submit(&sna->kgem);
+			_kgem_set_mode(&sna->kgem, KGEM_BLT);
+		}
+
+		upload = kgem_create_buffer(&sna->kgem,
+					    bstride*bh,
+					    KGEM_BUFFER_WRITE,
+					    &ptr);
+		if (!upload)
+			break;
+
+		dst = ptr;
+		bstride -= bw;
+
+		src_stride = stipple->devKind;
+		src = (uint8_t*)stipple->devPrivate.ptr;
+		src += (r->y - origin->y) * src_stride + bx1/8;
+		src_stride -= bw;
+		do {
+			int i = bw;
+			do {
+				*dst++ = byte_reverse(*src++);
+			} while (--i);
+			dst += bstride;
+			src += src_stride;
+		} while (--bh);
+
+		b = sna->kgem.batch + sna->kgem.nbatch;
+		b[0] = XY_MONO_SRC_COPY;
+		if (drawable->bitsPerPixel == 32)
+			b[0] |= 3 << 20;
+		b[0] |= ((r->x - origin->x) & 7) << 17;
+		b[1] = priv->gpu_bo->pitch;
+		if (sna->kgem.gen >= 40) {
+			if (priv->gpu_bo->tiling)
+				b[0] |= BLT_DST_TILED;
+			b[1] >>= 2;
+		}
+		b[1] |= (gc->fillStyle == FillStippled) << 29;
+		b[1] |= blt_depth(drawable->depth) << 24;
+		b[1] |= rop << 16;
+		b[2] = (r->y + dy) << 16 | (r->x + dx);
+		b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+		b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+				      priv->gpu_bo,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      I915_GEM_DOMAIN_RENDER |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+				      upload,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[6] = gc->bgPixel;
+		b[7] = gc->fgPixel;
+
+		sna->kgem.nbatch += 8;
+		kgem_bo_destroy(&sna->kgem, upload);
+
+		r++;
+	} while (--n);
+
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
+static bool
+sna_poly_fill_rect_stippled_blt(DrawablePtr drawable,
+				struct kgem_bo *bo,
+				struct sna_damage **damage,
+				GCPtr gc, int n, xRectangle *rect,
+				const BoxRec *extents, unsigned clipped)
+{
+
+	PixmapPtr stipple = gc->stipple;
+
+	if (bo->tiling == I915_TILING_Y)
+		return false;
+
+	DBG(("%s: origin (%d, %d), extents (stipple): (%d, %d), stipple size %dx%d\n",
+	     __FUNCTION__, gc->patOrg.x, gc->patOrg.y,
+	     extents->x2 - gc->patOrg.x - drawable->x,
+	     extents->y2 - gc->patOrg.y - drawable->y,
+	     stipple->drawable.width, stipple->drawable.height));
+
+	if (extents->x2 - gc->patOrg.x - drawable->x <= stipple->drawable.width &&
+	    extents->y2 - gc->patOrg.y - drawable->y <= stipple->drawable.height)
+		return sna_poly_fill_rect_stippled_1_blt(drawable, bo, damage,
+							 gc, n, rect,
+							 extents, clipped);
+
+	return false;
+}
+
 static unsigned
 sna_poly_fill_rect_extents(DrawablePtr drawable, GCPtr gc,
 			   int n, xRectangle *rect,
@@ -5347,6 +5795,26 @@ sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect)
 					     gc, n, rect,
 					     &region.extents, flags & 2))
 			return;
+	} else {
+		struct sna_pixmap *priv = sna_pixmap_from_drawable(draw);
+
+		DBG(("%s: stippled fill, testing for blt\n", __FUNCTION__));
+
+		if (sna_drawable_use_gpu_bo(draw, &region.extents) &&
+		    sna_poly_fill_rect_stippled_blt(draw,
+						    priv->gpu_bo,
+						    priv->gpu_only ? NULL : reduce_damage(draw, &priv->gpu_damage, &region.extents),
+						    gc, n, rect,
+						    &region.extents, flags & 2))
+			return;
+
+		if (sna_drawable_use_cpu_bo(draw, &region.extents) &&
+		    sna_poly_fill_rect_stippled_blt(draw,
+						    priv->cpu_bo,
+						    reduce_damage(draw, &priv->cpu_damage, &region.extents),
+						    gc, n, rect,
+						    &region.extents, flags & 2))
+			return;
 	}
 
 fallback:
@@ -5368,21 +5836,6 @@ fallback:
 	fbPolyFillRect(draw, gc, n, rect);
 }
 
-static uint8_t byte_reverse(uint8_t b)
-{
-	return ((b * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
-}
-
-static uint8_t blt_depth(int depth)
-{
-	switch (depth) {
-	case 8: return 0;
-	case 15: return 0x2;
-	case 16: return 0x1;
-	default: return 0x3;
-	}
-}
-
 static bool
 sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 	      int _x, int _y, unsigned int _n,
@@ -5453,7 +5906,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 	b[1] = priv->gpu_bo->pitch;
 	if (sna->kgem.gen >= 40) {
 		if (priv->gpu_bo->tiling)
-			b[0] |= 1 << 11;
+			b[0] |= BLT_DST_TILED;
 		b[1] >>= 2;
 	}
 	b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
@@ -5508,7 +5961,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 				b[1] = priv->gpu_bo->pitch;
 				if (sna->kgem.gen >= 40) {
 					if (priv->gpu_bo->tiling)
-						b[0] |= 1 << 11;
+						b[0] |= BLT_DST_TILED;
 					b[1] >>= 2;
 				}
 				b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
@@ -5531,7 +5984,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
 
 			b[0] = XY_TEXT_IMMEDIATE_BLT | (1 + len);
 			if (priv->gpu_bo->tiling && sna->kgem.gen >= 40)
-				b[0] |= 1 << 11;
+				b[0] |= BLT_DST_TILED;
 			b[1] = (uint16_t)y1 << 16 | (uint16_t)x1;
 			b[2] = (uint16_t)(y1+h) << 16 | (uint16_t)(x1+w);
 
@@ -5688,6 +6141,124 @@ fallback:
 	fbPolyGlyphBlt(drawable, gc, x, y, n, info, base);
 }
 
+static bool
+sna_push_pixels_solid_blt(GCPtr gc,
+			  PixmapPtr bitmap,
+			  DrawablePtr drawable,
+			  RegionPtr region)
+{
+	struct sna *sna = to_sna_from_drawable(drawable);
+	PixmapPtr pixmap = get_drawable_pixmap(drawable);
+	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	BoxRec *box;
+	int16_t dx, dy;
+	int n;
+	uint8_t rop = copy_ROP[gc->alu];
+
+	if (priv->gpu_bo->tiling == I915_TILING_Y)
+		return false;
+
+	if (!sna_drawable_use_gpu_bo(drawable, &region->extents))
+		return false;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	RegionTranslate(region, dx, dy);
+
+	assert_pixmap_contains_box(pixmap, RegionExtents(region));
+	if (!priv->gpu_only)
+		sna_damage_add(&priv->gpu_damage, region);
+
+	DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__,
+	     region->extents.x1, region->extents.y1,
+	     region->extents.x2, region->extents.y2));
+
+	kgem_set_mode(&sna->kgem, KGEM_BLT);
+
+	/* Region is pre-clipped and translated into pixmap space */
+	box = REGION_RECTS(region);
+	n = REGION_NUM_RECTS(region);
+	do {
+		int bx1 = (box->x1 - region->extents.x1) & ~7;
+		int bx2 = (box->x2 - region->extents.x1 + 7) & ~7;
+		int bw = (bx2 - bx1)/8;
+		int bh = box->y2 - box->y1;
+		int bstride = ALIGN(bw, 2);
+		int src_stride;
+		uint8_t *dst, *src;
+		uint32_t *b;
+		struct kgem_bo *upload;
+		void *ptr;
+
+		if (!kgem_check_batch(&sna->kgem, 8) ||
+		    !kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL) ||
+		    !kgem_check_reloc(&sna->kgem, 2)) {
+			_kgem_submit(&sna->kgem);
+			_kgem_set_mode(&sna->kgem, KGEM_BLT);
+		}
+
+		upload = kgem_create_buffer(&sna->kgem,
+					    bstride*bh,
+					    KGEM_BUFFER_WRITE,
+					    &ptr);
+		if (!upload)
+			break;
+
+		dst = ptr;
+		bstride -= bw;
+
+		src_stride = bitmap->devKind;
+		src = (uint8_t*)bitmap->devPrivate.ptr;
+		src += (box->y1 - region->extents.y1) * src_stride + bx1/8;
+		src_stride -= bw;
+		do {
+			int i = bw;
+			do {
+				*dst++ = byte_reverse(*src++);
+			} while (--i);
+			dst += bstride;
+			src += src_stride;
+		} while (--bh);
+
+		b = sna->kgem.batch + sna->kgem.nbatch;
+		b[0] = XY_MONO_SRC_COPY;
+		if (drawable->bitsPerPixel == 32)
+			b[0] |= 3 << 20;
+		b[0] |= ((box->x1 - region->extents.x1) & 7) << 17;
+		b[1] = priv->gpu_bo->pitch;
+		if (sna->kgem.gen >= 40) {
+			if (priv->gpu_bo->tiling)
+				b[0] |= BLT_DST_TILED;
+			b[1] >>= 2;
+		}
+		b[1] |= 1 << 29;
+		b[1] |= blt_depth(drawable->depth) << 24;
+		b[1] |= rop << 16;
+		b[2] = box->y1 << 16 | box->x1;
+		b[3] = box->y2 << 16 | box->x2;
+		b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
+				      priv->gpu_bo,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      I915_GEM_DOMAIN_RENDER |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
+				      upload,
+				      I915_GEM_DOMAIN_RENDER << 16 |
+				      KGEM_RELOC_FENCED,
+				      0);
+		b[6] = gc->bgPixel;
+		b[7] = gc->fgPixel;
+
+		sna->kgem.nbatch += 8;
+		kgem_bo_destroy(&sna->kgem, upload);
+
+		box++;
+	} while (--n);
+
+	sna->blt_state.fill_bo = 0;
+	return true;
+}
+
 static void
 sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable,
 		int w, int h,
@@ -5722,11 +6293,23 @@ sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable,
 	if (!RegionNotEmpty(&region))
 		return;
 
+	switch (gc->fillStyle) {
+	case FillSolid:
+		if (sna_push_pixels_solid_blt(gc, bitmap, drawable, &region))
+			return;
+		break;
+	default:
+		break;
+	}
+
+	DBG(("%s: fallback\n", __FUNCTION__));
 	sna_gc_move_to_cpu(gc);
 	sna_pixmap_move_to_cpu(bitmap, false);
 	sna_drawable_move_region_to_cpu(drawable, &region, true);
 	RegionUninit(&region);
 
+	DBG(("%s: fallback, fbPushPixels(%d, %d, %d %d)\n",
+	     __FUNCTION__, w, h, x, y));
 	fbPushPixels(gc, bitmap, drawable, w, h, x, y);
 }
 
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index 75f35c4..9af3b50 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -37,57 +37,26 @@
 #define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB	(0x1<<20)
 
 /* BLT commands */
-#define COLOR_BLT_CMD		((2<<29)|(0x40<<22)|(0x3))
-#define COLOR_BLT_WRITE_ALPHA	(1<<21)
-#define COLOR_BLT_WRITE_RGB	(1<<20)
-
-#define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|(0x4))
-#define XY_COLOR_BLT_WRITE_ALPHA	(1<<21)
-#define XY_COLOR_BLT_WRITE_RGB		(1<<20)
-#define XY_COLOR_BLT_TILED		(1<<11)
-
-#define XY_SETUP_CLIP			((2<<29)|(3<<22)|1)
-
-#define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
-#define XY_SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
-#define XY_SRC_COPY_BLT_WRITE_RGB	(1<<20)
-#define XY_SRC_COPY_BLT_SRC_TILED	(1<<15)
-#define XY_SRC_COPY_BLT_DST_TILED	(1<<11)
-
-#define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|0x4)
-#define SRC_COPY_BLT_WRITE_ALPHA	(1<<21)
-#define SRC_COPY_BLT_WRITE_RGB		(1<<20)
-
-#define XY_PAT_BLT_IMMEDIATE		((2<<29)|(0x72<<22))
-
-#define XY_MONO_PAT_BLT_CMD		((0x2<<29)|(0x52<<22)|0x7)
-#define XY_MONO_PAT_VERT_SEED		((1<<10)|(1<<9)|(1<<8))
-#define XY_MONO_PAT_HORT_SEED		((1<<14)|(1<<13)|(1<<12))
-#define XY_MONO_PAT_BLT_WRITE_ALPHA	(1<<21)
-#define XY_MONO_PAT_BLT_WRITE_RGB	(1<<20)
-
-#define XY_MONO_SRC_BLT_CMD		((0x2<<29)|(0x54<<22)|(0x6))
-#define XY_MONO_SRC_BLT_WRITE_ALPHA	(1<<21)
-#define XY_MONO_SRC_BLT_WRITE_RGB	(1<<20)
-
-/* BLT commands */
 #define BLT_WRITE_ALPHA		(1<<21)
 #define BLT_WRITE_RGB		(1<<20)
 #define BLT_SRC_TILED		(1<<15)
 #define BLT_DST_TILED		(1<<11)
 
-#define COLOR_BLT_CMD		((2<<29)|(0x40<<22)|(0x3))
+#define COLOR_BLT_CMD			((2<<29)|(0x40<<22)|(0x3))
 #define XY_COLOR_BLT_CMD		((2<<29)|(0x50<<22)|(0x4))
 #define XY_SETUP_BLT			((2<<29)|(1<<22)|6)
 #define XY_SETUP_MONO_PATTERN_SL_BLT	((2<<29)|(0x11<<22)|7)
-#define XY_SETUP_CLIP_BLT_CMD		((2<<29)|(3<<22)|1)
+#define XY_SETUP_CLIP			((2<<29)|(3<<22)|1)
 #define XY_SCANLINE_BLT			((2<<29)|(0x25<<22)|1)
 #define XY_TEXT_IMMEDIATE_BLT		((2<<29)|(0x31<<22)|(1<<16))
 #define XY_SRC_COPY_BLT_CMD		((2<<29)|(0x53<<22)|6)
 #define SRC_COPY_BLT_CMD		((2<<29)|(0x43<<22)|0x4)
 #define XY_PAT_BLT_IMMEDIATE		((2<<29)|(0x72<<22))
 #define XY_MONO_PAT_BLT_CMD		((0x2<<29)|(0x52<<22)|0x7)
-#define XY_MONO_SRC_BLT_CMD		((0x2<<29)|(0x54<<22)|(0x6))
+#define XY_MONO_PAT_VERT_SEED		((1<<10)|(1<<9)|(1<<8))
+#define XY_MONO_PAT_HORT_SEED		((1<<14)|(1<<13)|(1<<12))
+#define XY_MONO_SRC_COPY		((0x2<<29)|(0x54<<22)|(0x6))
+#define XY_MONO_SRC_COPY_IMM		((0x2<<29)|(0x71<<22))
 
 /* FLUSH commands */
 #define BRW_3D(Pipeline,Opcode,Subopcode) \


More information about the xorg-commit mailing list