xf86-video-intel: 5 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/kgem_debug_gen2.c src/sna/sna_accel.c src/sna/sna.h src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Wed Sep 7 12:29:27 PDT 2011


 src/sna/gen2_render.c     |   13 
 src/sna/gen3_render.c     |   97 +---
 src/sna/kgem_debug_gen2.c |  631 +++++++++++++++++++++++++++
 src/sna/sna.h             |   20 
 src/sna/sna_accel.c       |    5 
 src/sna/sna_trapezoids.c  | 1062 ++++++++++++++++++++++++++++++++++++++++++----
 6 files changed, 1695 insertions(+), 133 deletions(-)

New commits:
commit afdb8aa89a88c62ccd5e26a3ed9adb70fec310a8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Sep 7 20:10:02 2011 +0100

    sna/gen3: Do not assume video updates are always vsync'ed
    
    In case the video is running async, then there may be subsequent
    instructions within the batch and so we do need to mark the clobbered
    state as dirty when setting up the video frame.
    
    Reported-by: Paul Neumann <paul104x at yahoo.de>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=40693
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 9f4bcc9..46a5ddd 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1138,49 +1138,63 @@ gen3_get_batch(struct sna *sna,
 #undef MAX_OBJECTS
 }
 
-static void gen3_emit_composite_state(struct sna *sna,
-				      const struct sna_composite_op *op)
+static void gen3_emit_target(struct sna *sna,
+			     struct kgem_bo *bo,
+			     int width,
+			     int height,
+			     int format)
 {
 	struct gen3_render_state *state = &sna->render_state.gen3;
-	uint32_t map[4];
-	uint32_t sampler[4];
-	struct kgem_bo *bo[2];
-	int tex_count, n;
-	uint32_t ss2;
-
-	gen3_get_batch(sna, op);
 
 	/* BUF_INFO is an implicit flush, so skip if the target is unchanged. */
-	if (op->dst.bo->unique_id != state->current_dst) {
+	if (bo->unique_id != state->current_dst) {
 		uint32_t v;
 
 		OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
 		OUT_BATCH(BUF_3D_ID_COLOR_BACK |
-			  gen3_buf_tiling(op->dst.bo->tiling) |
-			  op->dst.bo->pitch);
+			  gen3_buf_tiling(bo->tiling) |
+			  bo->pitch);
 		OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
-					 op->dst.bo,
+					 bo,
 					 I915_GEM_DOMAIN_RENDER << 16 |
 					 I915_GEM_DOMAIN_RENDER,
 					 0));
 
 		OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
-		OUT_BATCH(gen3_get_dst_format(op->dst.format));
+		OUT_BATCH(gen3_get_dst_format(format));
 
-		v = (DRAW_YMAX(op->dst.height - 1) |
-		     DRAW_XMAX(op->dst.width - 1));
+		v = DRAW_YMAX(height - 1) | DRAW_XMAX(width - 1);
 		if (v != state->last_drawrect_limit) {
 			OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
-			OUT_BATCH(0);
+			OUT_BATCH(0); /* XXX dither origin? */
 			OUT_BATCH(0);
 			OUT_BATCH(v);
 			OUT_BATCH(0);
 			state->last_drawrect_limit = v;
 		}
 
-		state->current_dst = op->dst.bo->unique_id;
+		state->current_dst = bo->unique_id;
 	}
-	kgem_bo_mark_dirty(op->dst.bo);
+	kgem_bo_mark_dirty(bo);
+}
+
+static void gen3_emit_composite_state(struct sna *sna,
+				      const struct sna_composite_op *op)
+{
+	struct gen3_render_state *state = &sna->render_state.gen3;
+	uint32_t map[4];
+	uint32_t sampler[4];
+	struct kgem_bo *bo[2];
+	int tex_count, n;
+	uint32_t ss2;
+
+	gen3_get_batch(sna, op);
+
+	gen3_emit_target(sna,
+			 op->dst.bo,
+			 op->dst.width,
+			 op->dst.height,
+			 op->dst.format);
 
 	ss2 = ~0;
 	tex_count = 0;
@@ -2770,20 +2784,15 @@ gen3_emit_video_state(struct sna *sna,
 		      int width, int height)
 {
 	uint32_t shader_offset;
-	uint32_t ms3, s5;
+	uint32_t ms3;
 
-	/* draw rect -- just clipping */
-	OUT_BATCH(_3DSTATE_DRAW_RECT_CMD);
-	OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) |
-		  DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3));
-	OUT_BATCH(0x00000000);	/* ymin, xmin */
-	/* ymax, xmax */
-	OUT_BATCH((width - 1) | (height - 1) << 16);
-	OUT_BATCH(0x00000000);	/* yorigin, xorigin */
+	gen3_emit_target(sna, dst_bo, width, height,
+			 sna_format_for_depth(pixmap->drawable.depth));
 
+	/* XXX share with composite? Is it worth the effort? */
 	OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
-		  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(5) | I1_LOAD_S(6) |
-		  3);
+		  I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(6) |
+		  2);
 	OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT));
 	OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) |
 		  S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) |
@@ -2793,31 +2802,14 @@ gen3_emit_video_state(struct sna *sna,
 		  S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) |
 		  S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) |
 		  S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT));
-	s5 = 0x0;
-	if (pixmap->drawable.depth < 24)
-		s5 |= S5_COLOR_DITHER_ENABLE;
-	OUT_BATCH(s5);
 	OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) |
 		  (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) |
 		  (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) |
 		  S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT));
 
-	OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD);
-	OUT_BATCH(0x00000000);
-
-	OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD);
-	OUT_BATCH(gen3_get_dst_format(sna_format_for_depth(pixmap->drawable.depth)));
-
-	/* front buffer, pitch, offset */
-	OUT_BATCH(_3DSTATE_BUF_INFO_CMD);
-	OUT_BATCH(BUF_3D_ID_COLOR_BACK |
-		  gen3_buf_tiling(dst_bo->tiling) |
-		  dst_bo->pitch);
-	OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch,
-				 dst_bo,
-				 I915_GEM_DOMAIN_RENDER << 16 |
-				 I915_GEM_DOMAIN_RENDER,
-				 0));
+	sna->render_state.gen3.last_blend = 0;
+	sna->render_state.gen3.last_sampler = 0;
+	sna->render_state.gen3.floats_per_vertex = 4;
 
 	if (!is_planar_fourcc(frame->id)) {
 		OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4);
@@ -3054,11 +3046,6 @@ gen3_emit_video_state(struct sna *sna,
 	sna->kgem.batch[shader_offset] =
 		_3DSTATE_PIXEL_SHADER_PROGRAM |
 		(sna->kgem.nbatch - shader_offset - 2);
-
-	/* video is the last operation in the batch, so state gets reset
-	 * afterwards automatically
-	 * gen3_reset();
-	 */
 }
 
 static void
commit 6aee152cb8444bf63c0009a75c6537e9886b6070
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Sep 4 12:57:17 2011 +0100

    sna/gen2: Flush the batch when we run out of vertex space
    
    Unlike the later gen, we do not yet use a separate vertex buffer and so
    when can no longer fit a rectangle (and its CA ghost) we must flush the
    batch. Due to the duplication required for the CA pass, the normal
    checks to see whether we had sufficient space to add the new command
    were passing as they failed to take into account the need to submit the
    whole primitive again.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 021ce79..ff20032 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -566,6 +566,8 @@ gen2_get_batch(struct sna *sna,
 
 static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
 {
+	assert (sna->render_state.gen2.vertex_offset == 0);
+
 	if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
 		kgem_bo_mark_dirty(op->dst.bo);
 		return;
@@ -905,6 +907,9 @@ inline static int gen2_get_rectangles(struct sna *sna,
 	struct gen2_render_state *state = &sna->render_state.gen2;
 	int rem = batch_space(sna), size, need;
 
+	DBG(("%s: want=%d, floats_per_vertex=%d, rem=%d\n",
+	     __FUNCTION__, want, op->floats_per_vertex, rem));
+
 	assert(op->floats_per_vertex);
 
 	need = 1;
@@ -912,8 +917,12 @@ inline static int gen2_get_rectangles(struct sna *sna,
 	if (op->need_magic_ca_pass)
 		need += 6 + size*sna->render.vertex_index, size *= 2;
 
-	if (rem < need + size)
+	DBG(("%s: want=%d, need=%d,size=%d, rem=%d\n",
+	     __FUNCTION__, want, need, size, rem));
+	if (rem < need + size) {
+		kgem_submit (&sna->kgem);
 		return 0;
+	}
 
 	rem -= need;
 	if (state->vertex_offset == 0) {
@@ -1564,7 +1573,7 @@ gen2_render_composite_spans_box(struct sna *sna,
 				const struct sna_composite_spans_op *op,
 				const BoxRec *box, float opacity)
 {
-	DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
+	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
 	     __FUNCTION__,
 	     op->base.src.offset[0], op->base.src.offset[1],
 	     opacity,
commit 48bfe4e6ded746f32080b83079235a7576f50d7a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Sep 4 12:46:32 2011 +0100

    sna/gen2: Improve batch decoder.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem_debug_gen2.c b/src/sna/kgem_debug_gen2.c
index b5f61a3..e00cd81 100644
--- a/src/sna/kgem_debug_gen2.c
+++ b/src/sna/kgem_debug_gen2.c
@@ -44,14 +44,641 @@ static struct state {
 	int vertex_format;
 } state;
 
+static inline float int_as_float(uint32_t dw)
+{
+	union {
+		float f;
+		uint32_t dw;
+	} u;
+	u.dw = dw;
+	return u.f;
+}
+
+static int
+decode_3d_primitive(struct kgem *kgem, uint32_t offset)
+{
+    uint32_t *data = kgem->batch + offset;
+    char immediate = (data[0] & (1 << 23)) == 0;
+    unsigned int len;
+    const char *primtype;
+
+    switch ((data[0] >> 18) & 0xf) {
+    case 0x0: primtype = "TRILIST"; break;
+    case 0x1: primtype = "TRISTRIP"; break;
+    case 0x2: primtype = "TRISTRIP_REVERSE"; break;
+    case 0x3: primtype = "TRIFAN"; break;
+    case 0x4: primtype = "POLYGON"; break;
+    case 0x5: primtype = "LINELIST"; break;
+    case 0x6: primtype = "LINESTRIP"; break;
+    case 0x7: primtype = "RECTLIST"; break;
+    case 0x8: primtype = "POINTLIST"; break;
+    case 0x9: primtype = "DIB"; break;
+    case 0xa: primtype = "CLEAR_RECT"; break;
+    default: primtype = "unknown"; break;
+    }
+
+    /* XXX: 3DPRIM_DIB not supported */
+    if (immediate) {
+	len = (data[0] & 0x0003ffff) + 2;
+	kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype);
+#if 0
+	if (!saved_s2_set || !saved_s4_set) {
+	    fprintf(out, "unknown vertex format\n");
+	    for (i = 1; i < len; i++) {
+		kgem_debug_print(data, offset, i,
+			  "           vertex data (%f float)\n",
+			  int_as_float(data[i]));
+	    }
+	} else {
+	    unsigned int vertex = 0;
+	    for (i = 1; i < len;) {
+		unsigned int tc;
+
+#define VERTEX_OUT(fmt, ...) do {					\
+    if (i < len)							\
+	kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \
+    else								\
+	fprintf(out, " missing data in V%d\n", vertex);			\
+    i++;								\
+} while (0)
+
+		VERTEX_OUT("X = %f", int_as_float(data[i]));
+		VERTEX_OUT("Y = %f", int_as_float(data[i]));
+	        switch (saved_s4 >> 6 & 0x7) {
+		case 0x1:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    break;
+		case 0x2:
+		    VERTEX_OUT("Z = %f", int_as_float(data[i]));
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		case 0x3:
+		    break;
+		case 0x4:
+		    VERTEX_OUT("W = %f", int_as_float(data[i]));
+		    break;
+		default:
+		    fprintf(out, "bad S4 position mask\n");
+		}
+
+		if (saved_s4 & (1 << 10)) {
+		    VERTEX_OUT("color = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 11)) {
+		    VERTEX_OUT("spec = (A=0x%02x, R=0x%02x, G=0x%02x, "
+			       "B=0x%02x)",
+			       data[i] >> 24,
+			       (data[i] >> 16) & 0xff,
+			       (data[i] >> 8) & 0xff,
+			       data[i] & 0xff);
+		}
+		if (saved_s4 & (1 << 12))
+		    VERTEX_OUT("width = 0x%08x)", data[i]);
+
+		for (tc = 0; tc <= 7; tc++) {
+		    switch ((saved_s2 >> (tc * 4)) & 0xf) {
+		    case 0x0:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x1:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x2:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Y = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.Z = %f", tc, int_as_float(data[i]));
+			VERTEX_OUT("T%d.W = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x3:
+			VERTEX_OUT("T%d.X = %f", tc, int_as_float(data[i]));
+			break;
+		    case 0x4:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0x5:
+			VERTEX_OUT("T%d.XY = 0x%08x half-float", tc, data[i]);
+			VERTEX_OUT("T%d.ZW = 0x%08x half-float", tc, data[i]);
+			break;
+		    case 0xf:
+			break;
+		    default:
+			fprintf(out, "bad S2.T%d format\n", tc);
+		    }
+		}
+		vertex++;
+	    }
+	}
+#endif
+    } else {
+	/* indirect vertices */
+	len = data[0] & 0x0000ffff; /* index count */
+#if 0
+	if (data[0] & (1 << 17)) {
+	    /* random vertex access */
+	    kgem_debug_print(data, offset, 0,
+		      "3DPRIMITIVE random indirect %s (%d)\n", primtype, len);
+	    if (len == 0) {
+		/* vertex indices continue until 0xffff is found */
+		for (i = 1; i < count; i++) {
+		    if ((data[i] & 0xffff) == 0xffff) {
+			kgem_debug_print(data, offset, i,
+				  "    indices: (terminator)\n");
+			ret = i;
+			goto out;
+		    } else if ((data[i] >> 16) == 0xffff) {
+			kgem_debug_print(data, offset, i,
+				  "    indices: 0x%04x, (terminator)\n",
+				  data[i] & 0xffff);
+			ret = i;
+			goto out;
+		    } else {
+			kgem_debug_print(data, offset, i,
+				  "    indices: 0x%04x, 0x%04x\n",
+				  data[i] & 0xffff, data[i] >> 16);
+		    }
+		}
+		fprintf(out,
+			"3DPRIMITIVE: no terminator found in index buffer\n");
+		ret = count;
+		goto out;
+	    } else {
+		/* fixed size vertex index buffer */
+		for (j = 1, i = 0; i < len; i += 2, j++) {
+		    if (i * 2 == len - 1) {
+			kgem_debug_print(data, offset, j,
+				  "    indices: 0x%04x\n",
+				  data[j] & 0xffff);
+		    } else {
+			kgem_debug_print(data, offset, j,
+				  "    indices: 0x%04x, 0x%04x\n",
+				  data[j] & 0xffff, data[j] >> 16);
+		    }
+		}
+	    }
+	    ret = (len + 1) / 2 + 1;
+	    goto out;
+	} else {
+	    /* sequential vertex access */
+	    kgem_debug_print(data, offset, 0,
+		      "3DPRIMITIVE sequential indirect %s, %d starting from "
+		      "%d\n", primtype, len, data[1] & 0xffff);
+	    kgem_debug_print(data, offset, 1, "           start\n");
+	    ret = 2;
+	    goto out;
+	}
+#endif
+    }
+
+    return len;
+}
+
+static int
+decode_3d_1d(struct kgem *kgem, uint32_t offset)
+{
+    uint32_t *data = kgem->batch + offset;
+    unsigned int len, i, idx, word, map;
+    char *format, *zformat, *type;
+    uint32_t opcode;
+
+    static const struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	char *name;
+    } opcodes_3d_1d[] = {
+	{ 0x86, 4, 4, "3DSTATE_CHROMA_KEY" },
+	{ 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" },
+	{ 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" },
+	{ 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" },
+	{ 0x98, 2, 2, "3DSTATE_DEFAULT_Z" },
+	{ 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" },
+	{ 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" },
+	{ 0x9e, 4, 4, "3DSTATE_MONO_FILTER" },
+	{ 0x89, 4, 4, "3DSTATE_FOG_MODE" },
+	{ 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" },
+	{ 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" },
+	{ 0x8c, 2, 2, "3DSTATE_MAP_COORD_TRANSFORM" },
+	{ 0x8b, 2, 2, "3DSTATE_MAP_VERTEX_TRANSFORM" },
+	{ 0x8d, 3, 3, "3DSTATE_W_STATE" },
+	{ 0x01, 2, 2, "3DSTATE_COLOR_FACTOR" },
+	{ 0x02, 2, 2, "3DSTATE_MAP_COORD_SETBIND" },
+    }, *opcode_3d_1d;
+
+    opcode = (data[0] & 0x00ff0000) >> 16;
+
+    switch (opcode) {
+    case 0x07:
+	/* This instruction is unusual.  A 0 length means just 1 DWORD instead of
+	 * 2.  The 0 length is specified in one place to be unsupported, but
+	 * stated to be required in another, and 0 length LOAD_INDIRECTs appear
+	 * to cause no harm at least.
+	 */
+	kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n");
+	len = (data[0] & 0x000000ff) + 1;
+	i = 1;
+	if (data[0] & (0x01 << 8)) {
+	    kgem_debug_print(data, offset, i++, "SIS.0\n");
+	    kgem_debug_print(data, offset, i++, "SIS.1\n");
+	}
+	if (data[0] & (0x02 << 8)) {
+	    kgem_debug_print(data, offset, i++, "DIS.0\n");
+	}
+	if (data[0] & (0x04 << 8)) {
+	    kgem_debug_print(data, offset, i++, "SSB.0\n");
+	    kgem_debug_print(data, offset, i++, "SSB.1\n");
+	}
+	if (data[0] & (0x08 << 8)) {
+	    kgem_debug_print(data, offset, i++, "MSB.0\n");
+	    kgem_debug_print(data, offset, i++, "MSB.1\n");
+	}
+	if (data[0] & (0x10 << 8)) {
+	    kgem_debug_print(data, offset, i++, "PSP.0\n");
+	    kgem_debug_print(data, offset, i++, "PSP.1\n");
+	}
+	if (data[0] & (0x20 << 8)) {
+	    kgem_debug_print(data, offset, i++, "PSC.0\n");
+	    kgem_debug_print(data, offset, i++, "PSC.1\n");
+	}
+	assert(len == i);
+	return len;
+    case 0x04:
+	kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n");
+	len = (data[0] & 0x0000000f) + 2;
+	i = 1;
+	for (word = 0; word <= 8; word++) {
+	    if (data[0] & (1 << (4 + word))) {
+		kgem_debug_print(data, offset, i, "S%d: 0x%08x\n", i, data[i]);
+		i++;
+	    }
+	}
+	assert (len ==i);
+	return len;
+    case 0x03:
+	kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n");
+	len = (data[0] & 0x0000000f) + 2;
+	i = 1;
+	for (word = 6; word <= 14; word++) {
+	    if (data[0] & (1 << word)) {
+		if (word == 6)
+		    kgem_debug_print(data, offset, i++, "TBCF\n");
+		else if (word >= 7 && word <= 10) {
+		    kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7);
+		    kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7);
+		} else if (word >= 11 && word <= 14) {
+		    kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n",
+			      word - 11,
+			      data[i]&0xfffffffe,
+			      data[i]&1?"use fence":"");
+		    i++;
+		    kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n",
+			      word - 11,
+			      data[i]>>21, (data[i]>>10)&0x3ff,
+			      data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):"");
+		    i++;
+		    kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n",
+			      word - 11,
+			      ((data[i]>>21) + 1)*4);
+		    i++;
+		    kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11);
+		    kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11);
+		}
+	    }
+	}
+	assert (len == i);
+	return len;
+    case 0x00:
+	kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n");
+	len = (data[0] & 0x0000003f) + 2;
+	kgem_debug_print(data, offset, 1, "mask\n");
+
+	i = 2;
+	for (map = 0; map <= 15; map++) {
+	    if (data[1] & (1 << map)) {
+		int width, height, pitch, dword;
+		const char *tiling;
+
+		dword = data[i];
+		kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s\n", map,
+			  dword&(1<<31)?"untrusted surface, ":"",
+			  dword&(1<<1)?"vertical line stride enable, ":"",
+			  dword&(1<<0)?"vertical ofs enable, ":"");
+
+		dword = data[i];
+		width = ((dword >> 10) & ((1 << 11) - 1))+1;
+		height = ((dword >> 21) & ((1 << 11) - 1))+1;
+
+		tiling = "none";
+		if (dword & (1 << 2))
+			tiling = "fenced";
+		else if (dword & (1 << 1))
+			tiling = dword & (1 << 0) ? "Y" : "X";
+		type = " BAD";
+		format = "BAD";
+		switch ((dword>>7) & 0x7) {
+		case 1:
+		    type = "8b";
+		    switch ((dword>>3) & 0xf) {
+		    case 0: format = "I"; break;
+		    case 1: format = "L"; break;
+		    case 2: format = "A"; break;
+		    case 3: format = " mono"; break; }
+		    break;
+		case 2:
+		    type = "16b";
+		    switch ((dword>>3) & 0xf) {
+		    case 0: format = " rgb565"; break;
+		    case 1: format = " argb1555"; break;
+		    case 2: format = " argb4444"; break;
+		    case 5: format = " ay88"; break;
+		    case 6: format = " bump655"; break;
+		    case 7: format = "I"; break;
+		    case 8: format = "L"; break;
+		    case 9: format = "A"; break; }
+		    break;
+		case 3:
+		    type = "32b";
+		    switch ((dword>>3) & 0xf) {
+		    case 0: format = " argb8888"; break;
+		    case 1: format = " abgr8888"; break;
+		    case 2: format = " xrgb8888"; break;
+		    case 3: format = " xbgr8888"; break;
+		    case 4: format = " qwvu8888"; break;
+		    case 5: format = " axvu8888"; break;
+		    case 6: format = " lxvu8888"; break;
+		    case 7: format = " xlvu8888"; break;
+		    case 8: format = " argb2101010"; break;
+		    case 9: format = " abgr2101010"; break;
+		    case 10: format = " awvu2101010"; break;
+		    case 11: format = " gr1616"; break;
+		    case 12: format = " vu1616"; break;
+		    case 13: format = " xI824"; break;
+		    case 14: format = " xA824"; break;
+		    case 15: format = " xL824"; break; }
+		    break;
+		case 5:
+		    type = "422";
+		    switch ((dword>>3) & 0xf) {
+		    case 0: format = " yuv_swapy"; break;
+		    case 1: format = " yuv"; break;
+		    case 2: format = " yuv_swapuv"; break;
+		    case 3: format = " yuv_swapuvy"; break; }
+		    break;
+		case 6:
+		    type = "compressed";
+		    switch ((dword>>3) & 0x7) {
+		    case 0: format = " dxt1"; break;
+		    case 1: format = " dxt2_3"; break;
+		    case 2: format = " dxt4_5"; break;
+		    case 3: format = " fxt1"; break;
+		    case 4: format = " dxt1_rb"; break; }
+		    break;
+		case 7:
+		    type = "4b indexed";
+		    switch ((dword>>3) & 0xf) {
+		    case 7: format = " argb8888"; break; }
+		    break;
+		}
+		dword = data[i];
+		kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n",
+			  map, width, height, type, format, tiling,
+			  dword&(1<<9)?" palette select":"");
+
+		dword = data[i];
+		pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1);
+		kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n",
+			  map, pitch,
+			  (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f,
+			  dword&(1<<8)?"miplayout legacy":"miplayout right");
+	    }
+	}
+	assert (len == i);
+	return len;
+    case 0x85:
+	len = (data[0] & 0x0000000f) + 2;
+	assert (len == 2);
+	kgem_debug_print(data, offset, 0,
+		  "3DSTATE_DEST_BUFFER_VARIABLES\n");
+
+	switch ((data[1] >> 8) & 0xf) {
+	case 0x0: format = "g8"; break;
+	case 0x1: format = "x1r5g5b5"; break;
+	case 0x2: format = "r5g6b5"; break;
+	case 0x3: format = "a8r8g8b8"; break;
+	case 0x4: format = "ycrcb_swapy"; break;
+	case 0x5: format = "ycrcb_normal"; break;
+	case 0x6: format = "ycrcb_swapuv"; break;
+	case 0x7: format = "ycrcb_swapuvy"; break;
+	case 0x8: format = "a4r4g4b4"; break;
+	case 0x9: format = "a1r5g5b5"; break;
+	case 0xa: format = "a2r10g10b10"; break;
+	default: format = "BAD"; break;
+	}
+	switch ((data[1] >> 2) & 0x3) {
+	case 0x0: zformat = "u16"; break;
+	case 0x1: zformat = "f16"; break;
+	case 0x2: zformat = "u24x8"; break;
+	default: zformat = "BAD"; break;
+	}
+	kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n",
+		  format, zformat,
+		  (data[1] & (1 << 31)) ? "en" : "dis");
+	return len;
+
+    case 0x8e:
+	{
+	    const char *name, *tiling;
+
+	    len = (data[0] & 0x0000000f) + 2;
+	    assert (len == 3);
+
+	    switch((data[1] >> 24) & 0x7) {
+	    case 0x3: name = "color"; break;
+	    case 0x7: name = "depth"; break;
+	    default: name = "unknown"; break;
+	    }
+
+	    tiling = "none";
+	    if (data[1] & (1 << 23))
+		tiling = "fenced";
+	    else if (data[1] & (1 << 22))
+		tiling = data[1] & (1 << 21) ? "Y" : "X";
+
+	    kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n");
+	    kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff);
+
+	    kgem_debug_print(data, offset, 2, "address\n");
+	    return len;
+	}
+
+    case 0x81:
+	len = (data[0] & 0x0000000f) + 2;
+	assert (len == 3);
+
+	kgem_debug_print(data, offset, 0,
+		  "3DSTATE_SCISSOR_RECTANGLE\n");
+	kgem_debug_print(data, offset, 1, "(%d,%d)\n",
+		  data[1] & 0xffff, data[1] >> 16);
+	kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	return len;
+
+    case 0x80:
+	len = (data[0] & 0x0000000f) + 2;
+	assert (len == 5);
+
+	kgem_debug_print(data, offset, 0,
+		  "3DSTATE_DRAWING_RECTANGLE\n");
+	kgem_debug_print(data, offset, 1, "%s\n",
+		  data[1]&(1<<30)?"depth ofs disabled ":"");
+	kgem_debug_print(data, offset, 2, "(%d,%d)\n",
+		  data[2] & 0xffff, data[2] >> 16);
+	kgem_debug_print(data, offset, 3, "(%d,%d)\n",
+		  data[3] & 0xffff, data[3] >> 16);
+	kgem_debug_print(data, offset, 4, "(%d,%d)\n",
+		  data[4] & 0xffff, data[4] >> 16);
+	return len;
+
+    case 0x9c:
+	len = (data[0] & 0x0000000f) + 2;
+	assert (len == 7);
+
+	kgem_debug_print(data, offset, 0,
+		  "3DSTATE_CLEAR_PARAMETERS\n");
+	kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n",
+		  data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT",
+		  data[1]&(1<<2)?"color,":"",
+		  data[1]&(1<<1)?"depth,":"",
+		  data[1]&(1<<0)?"stencil,":"");
+	kgem_debug_print(data, offset, 2, "clear color\n");
+	kgem_debug_print(data, offset, 3, "clear depth/stencil\n");
+	kgem_debug_print(data, offset, 4, "color value (rgba8888)\n");
+	kgem_debug_print(data, offset, 5, "depth value %f\n",
+		  int_as_float(data[5]));
+	kgem_debug_print(data, offset, 6, "clear stencil\n");
+	return len;
+    }
+
+    for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) {
+	opcode_3d_1d = &opcodes_3d_1d[idx];
+	if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) {
+	    len = 1;
+
+	    kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name);
+	    if (opcode_3d_1d->max_len > 1) {
+		len = (data[0] & 0x0000ffff) + 2;
+		assert (len >= opcode_3d_1d->min_len &&
+			len <= opcode_3d_1d->max_len);
+	    }
+
+	    for (i = 1; i < len; i++)
+		kgem_debug_print(data, offset, i, "dword %d\n", i);
+
+	    return len;
+	}
+    }
+
+    kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode);
+    return 1;
+}
+
+static int
+decode_3d_1c(struct kgem *kgem, uint32_t offset)
+{
+    uint32_t *data = kgem->batch + offset;
+    uint32_t opcode;
+
+    opcode = (data[0] & 0x00f80000) >> 19;
+
+    switch (opcode) {
+    case 0x11:
+	kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n");
+	return 1;
+    case 0x10:
+	kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n",
+		data[0]&1?"enabled":"disabled");
+	return 1;
+    case 0x01:
+	kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n");
+	return 1;
+    case 0x0a:
+	kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n");
+	return 1;
+    case 0x05:
+	kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n");
+	return 1;
+    }
+
+    kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n",
+	      opcode);
+    return 1;
+}
+
 int kgem_gen2_decode_3d(struct kgem *kgem, uint32_t offset)
 {
+    const static struct {
+	uint32_t opcode;
+	int min_len;
+	int max_len;
+	const char *name;
+    } opcodes[] = {
+	{ 0x02, 1, 1, "3DSTATE_MODES_3" },
+	{ 0x03, 1, 1, "3DSTATE_ENABLES_1"},
+	{ 0x04, 1, 1, "3DSTATE_ENABLES_2"},
+	{ 0x05, 1, 1, "3DSTATE_VFT0"},
+	{ 0x06, 1, 1, "3DSTATE_AA"},
+	{ 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" },
+	{ 0x08, 1, 1, "3DSTATE_MODES_1" },
+	{ 0x09, 1, 1, "3DSTATE_STENCIL_TEST" },
+	{ 0x0a, 1, 1, "3DSTATE_VFT1"},
+	{ 0x0b, 1, 1, "3DSTATE_INDPT_ALPHA_BLEND" },
+	{ 0x0c, 1, 1, "3DSTATE_MODES_5" },
+	{ 0x0d, 1, 1, "3DSTATE_MAP_BLEND_OP" },
+	{ 0x0e, 1, 1, "3DSTATE_MAP_BLEND_ARG" },
+	{ 0x0f, 1, 1, "3DSTATE_MODES_2" },
+	{ 0x15, 1, 1, "3DSTATE_FOG_COLOR" },
+	{ 0x16, 1, 1, "3DSTATE_MODES_4" },
+    };
     uint32_t *data = kgem->batch + offset;
     uint32_t opcode = (data[0] & 0x1f000000) >> 24;
-    uint32_t len = (data[0] & 0xff) + 2;
+    uint32_t idx;
+
+    switch (opcode) {
+    case 0x1f:
+	return decode_3d_primitive(kgem, offset);
+    case 0x1d:
+	return decode_3d_1d(kgem, offset);
+    case 0x1c:
+	return decode_3d_1c(kgem, offset);
+    }
+
+    /* Catch the known instructions */
+    for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) {
+	if (opcode == opcodes[idx].opcode) {
+	    unsigned int len = 1, i;
+
+	    kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name);
+	    if (opcodes[idx].max_len > 1) {
+		len = (data[0] & 0xf) + 2;
+		assert(len >= opcodes[idx].min_len &&
+		       len <= opcodes[idx].max_len);
+	    }
+
+	    for (i = 1; i < len; i++)
+		kgem_debug_print(data, offset, i, "dword %d\n", i);
+	    return len;
+	}
+    }
 
     kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode);
-    return len;
+    return 1;
 }
 
 void kgem_gen2_finish_state(struct kgem *kgem)
commit 2cda0aaf397de1a0ca049508c6fa76f2dd4e61e8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sat Sep 3 14:46:57 2011 +0100

    sna/trapezoids: Check for alignment after projection
    
    If after projection onto the Imprecise fast sample grid, the trapezoid
    becomes a pixel-aligned box, treat it as such and send it down the fast
    paths.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index cb228e8..46c1e57 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -63,11 +63,10 @@
 #define SAMPLES_X 17
 #define SAMPLES_Y 15
 
-#define FAST_SAMPLES_X_shift 2
-#define FAST_SAMPLES_Y_shift 2
-
-#define FAST_SAMPLES_X (1<<FAST_SAMPLES_X_shift)
-#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_Y_shift)
+#define FAST_SAMPLES_shift 2
+#define FAST_SAMPLES_X (1<<FAST_SAMPLES_shift)
+#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_shift)
+#define FAST_SAMPLES_mask ((1<<FAST_SAMPLES_shift)-1)
 
 typedef void (*span_func_t)(struct sna *sna,
 			    struct sna_composite_spans_op *op,
@@ -129,11 +128,14 @@ typedef int grid_scaled_x_t;
 typedef int grid_scaled_y_t;
 
 #define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \
-	_GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_X_shift)
+	_GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_shift)
+
+#define FAST_SAMPLES_INT(x) ((x) >> (FAST_SAMPLES_shift))
+#define FAST_SAMPLES_FRAC(x) ((x) & (FAST_SAMPLES_mask))
 
 #define _GRID_TO_INT_FRAC_shift(t, i, f, b) do {	\
-    (f) = (t) & ((1 << (b)) - 1);			\
-    (i) = (t) >> (b);					\
+    (f) = FAST_SAMPLES_FRAC(t);				\
+    (i) = FAST_SAMPLES_INT(t);				\
 } while (0)
 
 /* A grid area is a real in [0,1] scaled by 2*SAMPLES_X*SAMPLES_Y.  We want
@@ -673,8 +675,15 @@ polygon_add_edge(struct polygon *polygon,
 	grid_scaled_y_t ymin = polygon->ymin;
 	grid_scaled_y_t ymax = polygon->ymax;
 
-	DBG(("%s: edge=(%d, %d), (%d, %d), top=%d, bottom=%d, dir=%d\n",
-	     __FUNCTION__, x1, y1, x2, y2, top, bottom, dir));
+	DBG(("%s: edge=(%d [%d.%d], %d [%d.%d]), (%d [%d.%d], %d [%d.%d]), top=%d [%d.%d], bottom=%d [%d.%d], dir=%d\n",
+	     __FUNCTION__,
+	     x1, FAST_SAMPLES_INT(x1), FAST_SAMPLES_FRAC(x1),
+	     y1, FAST_SAMPLES_INT(y1), FAST_SAMPLES_FRAC(y1),
+	     x2, FAST_SAMPLES_INT(x2), FAST_SAMPLES_FRAC(x2),
+	     y2, FAST_SAMPLES_INT(y2), FAST_SAMPLES_FRAC(y2),
+	     top, FAST_SAMPLES_INT(top), FAST_SAMPLES_FRAC(top),
+	     bottom, FAST_SAMPLES_INT(bottom), FAST_SAMPLES_FRAC(bottom),
+	     dir));
 	assert (dy > 0);
 
 	e->dy = dy;
@@ -2040,23 +2049,28 @@ composite_unaligned_boxes(CARD8 op,
 	return true;
 }
 
+static inline int pixman_fixed_to_grid (pixman_fixed_t v)
+{
+	return (v + FAST_SAMPLES_mask/2) >> (16 - FAST_SAMPLES_shift);
+}
+
 static inline bool
 project_trapezoid_onto_grid(const xTrapezoid *in,
 			    int dx, int dy,
 			    xTrapezoid *out)
 {
-	out->left.p1.x = dx + (in->left.p1.x >> (16 - FAST_SAMPLES_X_shift));
-	out->left.p1.y = dy + (in->left.p1.y >> (16 - FAST_SAMPLES_Y_shift));
-	out->left.p2.x = dx + (in->left.p2.x >> (16 - FAST_SAMPLES_X_shift));
-	out->left.p2.y = dy + (in->left.p2.y >> (16 - FAST_SAMPLES_Y_shift));
+	out->left.p1.x = dx + pixman_fixed_to_grid(in->left.p1.x);
+	out->left.p1.y = dy + pixman_fixed_to_grid(in->left.p1.y);
+	out->left.p2.x = dx + pixman_fixed_to_grid(in->left.p2.x);
+	out->left.p2.y = dy + pixman_fixed_to_grid(in->left.p2.y);
 
-	out->right.p1.x = dx + (in->right.p1.x >> (16 - FAST_SAMPLES_X_shift));
-	out->right.p1.y = dy + (in->right.p1.y >> (16 - FAST_SAMPLES_Y_shift));
-	out->right.p2.x = dx + (in->right.p2.x >> (16 - FAST_SAMPLES_X_shift));
-	out->right.p2.y = dy + (in->right.p2.y >> (16 - FAST_SAMPLES_Y_shift));
+	out->right.p1.x = dx + pixman_fixed_to_grid(in->right.p1.x);
+	out->right.p1.y = dy + pixman_fixed_to_grid(in->right.p1.y);
+	out->right.p2.x = dx + pixman_fixed_to_grid(in->right.p2.x);
+	out->right.p2.y = dy + pixman_fixed_to_grid(in->right.p2.y);
 
-	out->top = dy + (in->top >> (16 - FAST_SAMPLES_Y_shift));
-	out->bottom = dy + (in->bottom >> (16 - FAST_SAMPLES_Y_shift));
+	out->top = dy + pixman_fixed_to_grid(in->top);
+	out->bottom = dy + pixman_fixed_to_grid(in->bottom);
 
 	return xTrapezoidValid(out);
 }
@@ -2375,8 +2389,7 @@ sna_composite_trapezoids(CARD8 op,
 			 int ntrap, xTrapezoid *traps)
 {
 	struct sna *sna = to_sna_from_drawable(dst->pDrawable);
-	bool rectilinear = true;
-	bool pixel_aligned = true;
+	bool rectilinear, pixel_aligned;
 	int n;
 
 	DBG(("%s(op=%d, src=(%d, %d), mask=%08x, ntrap=%d)\n", __FUNCTION__,
@@ -2411,21 +2424,41 @@ sna_composite_trapezoids(CARD8 op,
 	}
 
 	/* scan through for fast rectangles */
-	for (n = 0; n < ntrap && rectilinear; n++) {
-		rectilinear &=
-			traps[n].left.p1.x == traps[n].left.p2.x &&
-			traps[n].right.p1.x == traps[n].right.p2.x;
-		pixel_aligned &=
-			((traps[n].top | traps[n].bottom |
-			  traps[n].left.p1.x | traps[n].left.p2.x |
-			  traps[n].right.p1.x | traps[n].right.p2.x)
-			 & pixman_fixed_1_minus_e) == 0;
+	rectilinear = pixel_aligned = true;
+	if (maskFormat ? maskFormat->depth == 1 : dst->polyEdge == PolyEdgeSharp) {
+		for (n = 0; n < ntrap && rectilinear; n++) {
+			int lx1 = pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2);
+			int lx2 = pixman_fixed_to_int(traps[n].left.p2.x + pixman_fixed_1_minus_e/2);
+			int rx1 = pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2);
+			int rx2 = pixman_fixed_to_int(traps[n].left.p2.x + pixman_fixed_1_minus_e/2);
+			rectilinear &= lx1 == lx2 && rx1 == rx2;
+		}
+	} else if (dst->polyMode != PolyModePrecise) {
+		for (n = 0; n < ntrap && rectilinear; n++) {
+			int lx1 = pixman_fixed_to_grid(traps[n].left.p1.x);
+			int lx2 = pixman_fixed_to_grid(traps[n].right.p2.x);
+			int rx1 = pixman_fixed_to_grid(traps[n].left.p1.x);
+			int rx2 = pixman_fixed_to_grid(traps[n].right.p2.x);
+			int top = pixman_fixed_to_grid(traps[n].top);
+			int bot = pixman_fixed_to_grid(traps[n].bottom);
+
+			rectilinear &= lx1 == lx2 && rx1 == rx2;
+			pixel_aligned &= ((top | bot | lx1 | lx2 | rx1 | rx2) & FAST_SAMPLES_mask) == 0;
+		}
+	} else {
+		for (n = 0; n < ntrap && rectilinear; n++) {
+			rectilinear &=
+				traps[n].left.p1.x == traps[n].left.p2.x &&
+				traps[n].right.p1.x == traps[n].right.p2.x;
+			pixel_aligned &=
+				((traps[n].top | traps[n].bottom |
+				  traps[n].left.p1.x | traps[n].left.p2.x |
+				  traps[n].right.p1.x | traps[n].right.p2.x)
+				 & pixman_fixed_1_minus_e) == 0;
+		}
 	}
 
 	if (rectilinear) {
-		pixel_aligned |= maskFormat ?
-			maskFormat->depth == 1 :
-			dst->polyEdge == PolyEdgeSharp;
 		if (pixel_aligned) {
 			if (composite_aligned_boxes(op, src, dst,
 						    maskFormat,
@@ -2462,8 +2495,8 @@ project_point_onto_grid(const xPointFixed *in,
 			int dx, int dy,
 			xPointFixed *out)
 {
-	out->x = dx + (in->x >> (16 - FAST_SAMPLES_X_shift));
-	out->y = dy + (in->y >> (16 - FAST_SAMPLES_Y_shift));
+	out->x = dx + pixman_fixed_to_grid(in->x);
+	out->y = dy + pixman_fixed_to_grid(in->y);
 }
 
 static inline bool
commit db0fb368c135d4fef4ae993df67ed4610a80fd52
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Sep 2 13:13:38 2011 +0100

    sna: Add missing implementation for Triangles
    
    Feed both into spans and as a mask fallback.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index f32395d..3620173 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -523,6 +523,26 @@ void sna_composite_trapezoids(CARD8 op,
 			      PictFormatPtr maskFormat,
 			      INT16 xSrc, INT16 ySrc,
 			      int ntrap, xTrapezoid *traps);
+void sna_composite_triangles(CARD8 op,
+			     PicturePtr src,
+			     PicturePtr dst,
+			     PictFormatPtr maskFormat,
+			     INT16 xSrc, INT16 ySrc,
+			     int ntri, xTriangle *tri);
+
+void sna_composite_tristrip(CARD8 op,
+			    PicturePtr src,
+			    PicturePtr dst,
+			    PictFormatPtr maskFormat,
+			    INT16 xSrc, INT16 ySrc,
+			    int npoints, xPointFixed *points);
+
+void sna_composite_trifan(CARD8 op,
+			  PicturePtr src,
+			  PicturePtr dst,
+			  PictFormatPtr maskFormat,
+			  INT16 xSrc, INT16 ySrc,
+			  int npoints, xPointFixed *points);
 
 Bool sna_gradients_create(struct sna *sna);
 void sna_gradients_close(struct sna *sna);
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 41da573..f83d8dc 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3346,6 +3346,11 @@ Bool sna_accel_init(ScreenPtr screen, struct sna *sna)
 			ps->UnrealizeGlyph = sna_glyph_unrealize;
 			ps->AddTraps = sna_add_traps;
 			ps->Trapezoids = sna_composite_trapezoids;
+			ps->Triangles = sna_composite_triangles;
+#if PICTURE_SCREEN_VERSION >= 2
+			ps->TriStrip = sna_composite_tristrip;
+			ps->TriFan = sna_composite_trifan;
+#endif
 		}
 	}
 #endif
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 4698f7f..cb228e8 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -52,6 +52,14 @@
 
 #define unlikely(x) x
 
+#ifndef MAX
+#define MAX(x,y) ((x) >= (y) ? (x) : (y))
+#endif
+
+#ifndef MIN
+#define MIN(x,y) ((x) <= (y) ? (x) : (y))
+#endif
+
 #define SAMPLES_X 17
 #define SAMPLES_Y 15
 
@@ -61,6 +69,12 @@
 #define FAST_SAMPLES_X (1<<FAST_SAMPLES_X_shift)
 #define FAST_SAMPLES_Y (1<<FAST_SAMPLES_Y_shift)
 
+typedef void (*span_func_t)(struct sna *sna,
+			    struct sna_composite_spans_op *op,
+			    pixman_region16_t *clip,
+			    const BoxRec *box,
+			    int coverage);
+
 #if DEBUG_TRAPEZOIDS
 static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function)
 {
@@ -694,6 +708,87 @@ polygon_add_edge(struct polygon *polygon,
 	e->x.rem -= dy; /* Bias the remainder for faster edge advancement. */
 }
 
+inline static void
+polygon_add_line(struct polygon *polygon,
+		 const xPointFixed *p1,
+		 const xPointFixed *p2)
+{
+	struct edge *e = &polygon->edges[polygon->num_edges];
+	grid_scaled_x_t dx = p2->x - p1->x;
+	grid_scaled_y_t dy = p2->y - p1->y;
+	grid_scaled_y_t top, bot;
+
+	if (dy == 0)
+		return;
+
+	DBG(("%s: line=(%d, %d), (%d, %d)\n",
+	     __FUNCTION__, (int)p1->x, (int)p1->y, (int)p2->x, (int)p2->y));
+
+	e->dir = 1;
+	if (dy < 0) {
+		const xPointFixed *t;
+
+		dx = -dx;
+		dy = -dy;
+
+		e->dir = -1;
+
+		t = p1;
+		p1 = p2;
+		p2 = t;
+	}
+	assert (dy > 0);
+	e->dy = dy;
+
+	top = MAX(p1->y, polygon->ymin);
+	bot = MIN(p2->y, polygon->ymax);
+	if (bot <= top)
+		return;
+
+	e->ytop = top;
+	e->height_left = bot - top;
+
+	if (dx == 0) {
+		e->vertical = true;
+		e->x.quo = p1->x;
+		e->x.rem = 0;
+		e->dxdy.quo = 0;
+		e->dxdy.rem = 0;
+	} else {
+		e->vertical = false;
+		e->dxdy = floored_divrem(dx, dy);
+		if (top == p1->y) {
+			e->x.quo = p1->x;
+			e->x.rem = -dy;
+		} else {
+			e->x = floored_muldivrem(top - p1->y, dx, dy);
+			e->x.quo += p1->x;
+			e->x.rem -= dy;
+		}
+	}
+
+	if (polygon->num_edges > 0) {
+		struct edge *prev = &polygon->edges[polygon->num_edges-1];
+		/* detect degenerate triangles inserted into tristrips */
+		if (e->dir == -prev->dir &&
+		    e->ytop == prev->ytop &&
+		    e->height_left == prev->height_left &&
+		    e->x.quo == prev->x.quo &&
+		    e->x.rem == prev->x.rem &&
+		    e->dxdy.quo == prev->dxdy.quo &&
+		    e->dxdy.rem == prev->dxdy.rem) {
+			unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop,
+							  polygon->ymin);
+			polygon->y_buckets[ix] = prev->next;
+			polygon->num_edges--;
+			return;
+		}
+	}
+
+	_polygon_insert_edge_into_its_y_bucket(polygon, e);
+	polygon->num_edges++;
+}
+
 static void
 active_list_reset(struct active_list *active)
 {
@@ -789,7 +884,6 @@ sort_edges(struct edge  *list,
 	return remaining;
 }
 
-
 static struct edge *
 merge_unsorted_edges (struct edge *head, struct edge *unsorted)
 {
@@ -961,10 +1055,11 @@ tor_fini(struct tor *converter)
 static int
 tor_init(struct tor *converter, const BoxRec *box, int num_edges)
 {
-	DBG(("%s: (%d, %d),(%d, %d) x (%d, %d)\n",
+	DBG(("%s: (%d, %d),(%d, %d) x (%d, %d), num_edges=%d\n",
 	     __FUNCTION__,
 	     box->x1, box->y1, box->x2, box->y2,
-	     FAST_SAMPLES_X, FAST_SAMPLES_Y));
+	     FAST_SAMPLES_X, FAST_SAMPLES_Y,
+	     num_edges));
 
 	converter->xmin = box->x1;
 	converter->ymin = box->y1;
@@ -1966,19 +2061,43 @@ project_trapezoid_onto_grid(const xTrapezoid *in,
 	return xTrapezoidValid(out);
 }
 
+static span_func_t
+choose_span(PicturePtr dst,
+	    PictFormatPtr maskFormat,
+	    uint8_t op,
+	    RegionPtr clip)
+{
+	span_func_t span;
+
+	if (maskFormat ? maskFormat->depth<8 : dst->polyEdge==PolyEdgeSharp) {
+		/* XXX An imprecise approximation */
+		if (maskFormat && !operator_is_bounded(op)) {
+			span = tor_blt_span_mono_unbounded;
+			if (REGION_NUM_RECTS(clip) > 1)
+				span = tor_blt_span_mono_unbounded_clipped;
+		} else {
+			span = tor_blt_span_mono;
+			if (REGION_NUM_RECTS(clip) > 1)
+				span = tor_blt_span_mono_clipped;
+		}
+	} else {
+		span = tor_blt_span;
+		if (REGION_NUM_RECTS(clip) > 1)
+			span = tor_blt_span_clipped;
+	}
+
+	return span;
+}
+
+
 static bool
-tor_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-		   PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
-		   int ntrap, xTrapezoid *traps)
+trap_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+		    PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+		    int ntrap, xTrapezoid *traps)
 {
 	struct sna *sna;
 	struct sna_composite_spans_op tmp;
 	struct tor tor;
-	void (*span)(struct sna *sna,
-		     struct sna_composite_spans_op *op,
-		     pixman_region16_t *clip,
-		     const BoxRec *box,
-		     int coverage);
 	BoxRec extents;
 	pixman_region16_t clip;
 	int16_t dst_x, dst_y;
@@ -2077,24 +2196,8 @@ tor_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 		tor_add_edge(&tor, &t, &t.right, -1);
 	}
 
-	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp) {
-		/* XXX An imprecise approximation */
-		if (maskFormat && !operator_is_bounded(op)) {
-			span = tor_blt_span_mono_unbounded;
-			if (REGION_NUM_RECTS(&clip) > 1)
-				span = tor_blt_span_mono_unbounded_clipped;
-		} else {
-			span = tor_blt_span_mono;
-			if (REGION_NUM_RECTS(&clip) > 1)
-				span = tor_blt_span_mono_clipped;
-		}
-	} else {
-		span = tor_blt_span;
-		if (REGION_NUM_RECTS(&clip) > 1)
-			span = tor_blt_span_clipped;
-	}
-
-	tor_render(sna, &tor, &tmp, &clip, span,
+	tor_render(sna, &tor, &tmp, &clip,
+		   choose_span(dst, maskFormat, op, &clip),
 		   maskFormat && !operator_is_bounded(op));
 
 skip:
@@ -2148,16 +2251,12 @@ tor_blt_mask_mono(struct sna *sna,
 }
 
 static bool
-tor_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
-		   PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
-		   int ntrap, xTrapezoid *traps)
+trap_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+		    PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+		    int ntrap, xTrapezoid *traps)
 {
 	struct tor tor;
-	void (*span)(struct sna *sna,
-		     struct sna_composite_spans_op *op,
-		     pixman_region16_t *clip,
-		     const BoxRec *box,
-		     int coverage);
+	span_func_t span;
 	ScreenPtr screen = dst->pDrawable->pScreen;
 	PixmapPtr scratch;
 	PicturePtr mask;
@@ -2342,12 +2441,12 @@ sna_composite_trapezoids(CARD8 op,
 		}
 	}
 
-	if (tor_span_converter(op, src, dst, maskFormat,
-			       xSrc, ySrc, ntrap, traps))
+	if (trap_span_converter(op, src, dst, maskFormat,
+				xSrc, ySrc, ntrap, traps))
 		return;
 
-	if (tor_mask_converter(op, src, dst, maskFormat,
-			       xSrc, ySrc, ntrap, traps))
+	if (trap_mask_converter(op, src, dst, maskFormat,
+				xSrc, ySrc, ntrap, traps))
 		return;
 
 fallback:
@@ -2357,3 +2456,785 @@ fallback:
 			    xSrc, ySrc,
 			    ntrap, traps);
 }
+
+static inline void
+project_point_onto_grid(const xPointFixed *in,
+			int dx, int dy,
+			xPointFixed *out)
+{
+	out->x = dx + (in->x >> (16 - FAST_SAMPLES_X_shift));
+	out->y = dy + (in->y >> (16 - FAST_SAMPLES_Y_shift));
+}
+
+static inline bool
+xTriangleValid(const xTriangle *t)
+{
+	xPointFixed v1, v2;
+
+	v1.x = t->p2.x - t->p1.x;
+	v1.y = t->p2.y - t->p1.y;
+
+	v2.x = t->p3.x - t->p1.x;
+	v2.y = t->p3.y - t->p1.y;
+
+	/* if the length of any edge is zero, the area must be zero */
+	if (v1.x == 0 && v1.y == 0)
+		return FALSE;
+	if (v2.x == 0 && v2.y == 0)
+		return FALSE;
+
+	/* if the cross-product is zero, so it the size */
+	return v2.y * v1.x != v1.y * v2.x;
+}
+
+static inline bool
+project_triangle_onto_grid(const xTriangle *in,
+			   int dx, int dy,
+			   xTriangle *out)
+{
+	project_point_onto_grid(&in->p1, dx, dy, &out->p1);
+	project_point_onto_grid(&in->p2, dx, dy, &out->p2);
+	project_point_onto_grid(&in->p3, dx, dy, &out->p3);
+
+	return xTriangleValid(out);
+}
+
+static bool
+triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+			 int count, xTriangle *tri)
+{
+	struct sna *sna;
+	struct sna_composite_spans_op tmp;
+	struct tor tor;
+	BoxRec extents;
+	pixman_region16_t clip;
+	int16_t dst_x, dst_y;
+	int16_t dx, dy;
+	int n;
+
+	if (NO_SCAN_CONVERTER)
+		return false;
+
+	/* XXX strict adherence to the Render specification */
+	if (dst->polyMode == PolyModePrecise) {
+		DBG(("%s: fallback -- precise rasterisation requested\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	sna = to_sna_from_drawable(dst->pDrawable);
+	if (!sna->render.composite_spans) {
+		DBG(("%s: fallback -- composite spans not supported\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	dst_x = pixman_fixed_to_int(tri[0].p1.x);
+	dst_y = pixman_fixed_to_int(tri[0].p1.y);
+
+	miTriangleBounds(count, tri, &extents);
+	DBG(("%s: extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+
+	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
+		return true;
+
+#if 0
+	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
+		DBG(("%s: fallback -- traps extents too small %dx%d\n",
+		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
+		return false;
+	}
+#endif
+
+	if (!sna_compute_composite_region(&clip,
+					  src, NULL, dst,
+					  src_x + extents.x1 - dst_x,
+					  src_y + extents.y1 - dst_y,
+					  0, 0,
+					  extents.x1, extents.y1,
+					  extents.x2 - extents.x1,
+					  extents.y2 - extents.y1)) {
+		DBG(("%s: triangles do not intersect drawable clips\n",
+		     __FUNCTION__)) ;
+		return true;
+	}
+
+	extents = *RegionExtents(&clip);
+	dx = dst->pDrawable->x;
+	dy = dst->pDrawable->y;
+
+	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
+	     __FUNCTION__,
+	     extents.x1, extents.y1,
+	     extents.x2, extents.y2,
+	     dx, dy,
+	     src_x + extents.x1 - dst_x - dx,
+	     src_y + extents.y1 - dst_y - dy));
+
+	memset(&tmp, 0, sizeof(tmp));
+	if (!sna->render.composite_spans(sna, op, src, dst,
+					 src_x + extents.x1 - dst_x - dx,
+					 src_y + extents.y1 - dst_y - dy,
+					 extents.x1,  extents.y1,
+					 extents.x2 - extents.x1,
+					 extents.y2 - extents.y1,
+					 &tmp)) {
+		DBG(("%s: fallback -- composite spans render op not supported\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	dx *= FAST_SAMPLES_X;
+	dy *= FAST_SAMPLES_Y;
+	if (tor_init(&tor, &extents, 3*count))
+		goto skip;
+
+	for (n = 0; n < count; n++) {
+		xTriangle t;
+
+		if (!project_triangle_onto_grid(&tri[n], dx, dy, &t))
+			continue;
+
+		polygon_add_line(tor.polygon, &t.p1, &t.p2);
+		polygon_add_line(tor.polygon, &t.p2, &t.p3);
+		polygon_add_line(tor.polygon, &t.p3, &t.p1);
+	}
+
+	tor_render(sna, &tor, &tmp, &clip,
+		   choose_span(dst, maskFormat, op, &clip),
+		   maskFormat && !operator_is_bounded(op));
+
+skip:
+	tor_fini(&tor);
+	tmp.done(sna, &tmp);
+
+	REGION_UNINIT(NULL, &clip);
+	return true;
+}
+
+static bool
+triangles_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+			 PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+			 int count, xTriangle *tri)
+{
+	struct tor tor;
+	void (*span)(struct sna *sna,
+		     struct sna_composite_spans_op *op,
+		     pixman_region16_t *clip,
+		     const BoxRec *box,
+		     int coverage);
+	ScreenPtr screen = dst->pDrawable->pScreen;
+	PixmapPtr scratch;
+	PicturePtr mask;
+	BoxRec extents;
+	int16_t dst_x, dst_y;
+	int16_t dx, dy;
+	int error;
+	int n;
+
+	if (NO_SCAN_CONVERTER)
+		return false;
+
+	if (dst->polyMode == PolyModePrecise) {
+		DBG(("%s: fallback -- precise rasterisation requested\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	if (maskFormat == NULL && count > 1) {
+		DBG(("%s: fallback -- individual rasterisation requested\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	miTriangleBounds(count, tri, &extents);
+	DBG(("%s: extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+
+	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
+		return true;
+
+	if (!sna_compute_composite_extents(&extents,
+					   src, NULL, dst,
+					   src_x, src_y,
+					   0, 0,
+					   extents.x1, extents.y1,
+					   extents.x2 - extents.x1,
+					   extents.y2 - extents.y1))
+		return true;
+
+	DBG(("%s: extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+
+	extents.y2 -= extents.y1;
+	extents.x2 -= extents.x1;
+	extents.x1 -= dst->pDrawable->x;
+	extents.y1 -= dst->pDrawable->y;
+	dst_x = extents.x1;
+	dst_y = extents.y1;
+	dx = -extents.x1 * FAST_SAMPLES_X;
+	dy = -extents.y1 * FAST_SAMPLES_Y;
+	extents.x1 = extents.y1 = 0;
+
+	DBG(("%s: mask (%dx%d)\n",
+	     __FUNCTION__, extents.x2, extents.y2));
+	scratch = sna_pixmap_create_upload(screen, extents.x2, extents.y2, 8);
+	if (!scratch)
+		return true;
+
+	DBG(("%s: created buffer %p, stride %d\n",
+	     __FUNCTION__, scratch->devPrivate.ptr, scratch->devKind));
+
+	if (tor_init(&tor, &extents, 3*count)) {
+		screen->DestroyPixmap(scratch);
+		return true;
+	}
+
+	for (n = 0; n < count; n++) {
+		xTriangle t;
+
+		if (!project_triangle_onto_grid(&tri[n], dx, dy, &t))
+			continue;
+
+		polygon_add_line(tor.polygon, &t.p1, &t.p2);
+		polygon_add_line(tor.polygon, &t.p2, &t.p3);
+		polygon_add_line(tor.polygon, &t.p3, &t.p1);
+	}
+
+	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
+		span = tor_blt_mask_mono;
+	else
+		span = tor_blt_mask;
+
+	tor_render(NULL, &tor,
+		   scratch->devPrivate.ptr,
+		   (void *)(intptr_t)scratch->devKind,
+		   span, true);
+
+	mask = CreatePicture(0, &scratch->drawable,
+			     PictureMatchFormat(screen, 8, PICT_a8),
+			     0, 0, serverClient, &error);
+	screen->DestroyPixmap(scratch);
+	if (mask) {
+		CompositePicture(op, src, mask, dst,
+				 src_x + dst_x - pixman_fixed_to_int(tri[0].p1.x),
+				 src_y + dst_y - pixman_fixed_to_int(tri[0].p1.y),
+				 0, 0,
+				 dst_x, dst_y,
+				 extents.x2, extents.y2);
+		FreePicture(mask, 0);
+	}
+	tor_fini(&tor);
+
+	return true;
+}
+
+static void
+triangles_fallback(CARD8 op,
+		   PicturePtr src,
+		   PicturePtr dst,
+		   PictFormatPtr maskFormat,
+		   INT16 xSrc, INT16 ySrc,
+		   int n, xTriangle *tri)
+{
+	ScreenPtr screen = dst->pDrawable->pScreen;
+
+	DBG(("%s op=%d, count=%d\n", __FUNCTION__, op, n));
+
+	if (maskFormat) {
+		PixmapPtr scratch;
+		PicturePtr mask;
+		INT16 dst_x, dst_y;
+		BoxRec bounds;
+		int width, height, depth;
+		pixman_image_t *image;
+		pixman_format_code_t format;
+		int error;
+
+		dst_x = pixman_fixed_to_int(tri[0].p1.x);
+		dst_y = pixman_fixed_to_int(tri[0].p1.y);
+
+		miTriangleBounds(n, tri, &bounds);
+		DBG(("%s: bounds (%d, %d), (%d, %d)\n",
+		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
+			return;
+
+		if (!sna_compute_composite_extents(&bounds,
+						   src, NULL, dst,
+						   xSrc, ySrc,
+						   0, 0,
+						   bounds.x1, bounds.y1,
+						   bounds.x2 - bounds.x1,
+						   bounds.y2 - bounds.y1))
+			return;
+
+		DBG(("%s: extents (%d, %d), (%d, %d)\n",
+		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+		width  = bounds.x2 - bounds.x1;
+		height = bounds.y2 - bounds.y1;
+		bounds.x1 -= dst->pDrawable->x;
+		bounds.y1 -= dst->pDrawable->y;
+		depth = maskFormat->depth;
+		format = maskFormat->format | (BitsPerPixel(depth) << 24);
+
+		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
+		     __FUNCTION__, width, height, depth, format));
+		scratch = sna_pixmap_create_upload(screen,
+						   width, height, depth);
+		if (!scratch)
+			return;
+
+		memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
+		image = pixman_image_create_bits(format, width, height,
+						 scratch->devPrivate.ptr,
+						 scratch->devKind);
+		if (image) {
+			pixman_add_triangles(image,
+					     -bounds.x1, -bounds.y1,
+					     n, (pixman_triangle_t *)tri);
+			pixman_image_unref(image);
+		}
+
+		mask = CreatePicture(0, &scratch->drawable,
+				     PictureMatchFormat(screen, depth, format),
+				     0, 0, serverClient, &error);
+		screen->DestroyPixmap(scratch);
+		if (!mask)
+			return;
+
+		CompositePicture(op, src, mask, dst,
+				 xSrc + bounds.x1 - dst_x,
+				 ySrc + bounds.y1 - dst_y,
+				 0, 0,
+				 bounds.x1, bounds.y1,
+				 width, height);
+		FreePicture(mask, 0);
+	} else {
+		if (dst->polyEdge == PolyEdgeSharp)
+			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
+		else
+			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
+
+		for (; n--; tri++)
+			triangles_fallback(op,
+					   src, dst, maskFormat,
+					   xSrc, ySrc, 1, tri);
+	}
+}
+
+void
+sna_composite_triangles(CARD8 op,
+			 PicturePtr src,
+			 PicturePtr dst,
+			 PictFormatPtr maskFormat,
+			 INT16 xSrc, INT16 ySrc,
+			 int n, xTriangle *tri)
+{
+	if (triangles_span_converter(op, src, dst, maskFormat,
+				     xSrc, ySrc,
+				     n, tri))
+		return;
+
+	if (triangles_mask_converter(op, src, dst, maskFormat,
+				     xSrc, ySrc,
+				     n, tri))
+		return;
+
+	triangles_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, tri);
+}
+
+static bool
+tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
+			PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
+			int count, xPointFixed *points)
+{
+	struct sna *sna;
+	struct sna_composite_spans_op tmp;
+	struct tor tor;
+	BoxRec extents;
+	pixman_region16_t clip;
+	xPointFixed p[4];
+	int16_t dst_x, dst_y;
+	int16_t dx, dy;
+	int cw, ccw, n;
+
+	if (NO_SCAN_CONVERTER)
+		return false;
+
+	/* XXX strict adherence to the Render specification */
+	if (dst->polyMode == PolyModePrecise) {
+		DBG(("%s: fallback -- precise rasterisation requested\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	sna = to_sna_from_drawable(dst->pDrawable);
+	if (!sna->render.composite_spans) {
+		DBG(("%s: fallback -- composite spans not supported\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	dst_x = pixman_fixed_to_int(points[0].x);
+	dst_y = pixman_fixed_to_int(points[0].y);
+
+	miPointFixedBounds(count, points, &extents);
+	DBG(("%s: extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+
+	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
+		return true;
+
+#if 0
+	if (extents.y2 - extents.y1 < 64 && extents.x2 - extents.x1 < 64) {
+		DBG(("%s: fallback -- traps extents too small %dx%d\n",
+		     __FUNCTION__, extents.y2 - extents.y1, extents.x2 - extents.x1));
+		return false;
+	}
+#endif
+
+	if (!sna_compute_composite_region(&clip,
+					  src, NULL, dst,
+					  src_x + extents.x1 - dst_x,
+					  src_y + extents.y1 - dst_y,
+					  0, 0,
+					  extents.x1, extents.y1,
+					  extents.x2 - extents.x1,
+					  extents.y2 - extents.y1)) {
+		DBG(("%s: triangles do not intersect drawable clips\n",
+		     __FUNCTION__)) ;
+		return true;
+	}
+
+	extents = *RegionExtents(&clip);
+	dx = dst->pDrawable->x;
+	dy = dst->pDrawable->y;
+
+	DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n",
+	     __FUNCTION__,
+	     extents.x1, extents.y1,
+	     extents.x2, extents.y2,
+	     dx, dy,
+	     src_x + extents.x1 - dst_x - dx,
+	     src_y + extents.y1 - dst_y - dy));
+
+	memset(&tmp, 0, sizeof(tmp));
+	if (!sna->render.composite_spans(sna, op, src, dst,
+					 src_x + extents.x1 - dst_x - dx,
+					 src_y + extents.y1 - dst_y - dy,
+					 extents.x1,  extents.y1,
+					 extents.x2 - extents.x1,
+					 extents.y2 - extents.y1,
+					 &tmp)) {
+		DBG(("%s: fallback -- composite spans render op not supported\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	dx *= FAST_SAMPLES_X;
+	dy *= FAST_SAMPLES_Y;
+	if (tor_init(&tor, &extents, 2*count))
+		goto skip;
+
+	cw = ccw = 0;
+	project_point_onto_grid(&points[0], dx, dy, &p[cw]);
+	project_point_onto_grid(&points[1], dx, dy, &p[2+ccw]);
+	polygon_add_line(tor.polygon, &p[cw], &p[2+ccw]);
+	n = 2;
+	do {
+		cw = !cw;
+		project_point_onto_grid(&points[n], dx, dy, &p[cw]);
+		polygon_add_line(tor.polygon, &p[!cw], &p[cw]);
+		if (++n == count)
+			break;
+
+		ccw = !ccw;
+		project_point_onto_grid(&points[n], dx, dy, &p[2+ccw]);
+		polygon_add_line(tor.polygon, &p[2+ccw], &p[2+!ccw]);
+		if (++n == count)
+			break;
+	} while (1);
+	polygon_add_line(tor.polygon, &p[2+ccw], &p[cw]);
+	assert(tor.polygon->num_edges <= 2*count);
+
+	tor_render(sna, &tor, &tmp, &clip,
+		   choose_span(dst, maskFormat, op, &clip),
+		   maskFormat && !operator_is_bounded(op));
+
+skip:
+	tor_fini(&tor);
+	tmp.done(sna, &tmp);
+
+	REGION_UNINIT(NULL, &clip);
+	return true;
+}
+
+static void
+tristrip_fallback(CARD8 op,
+		  PicturePtr src,
+		  PicturePtr dst,
+		  PictFormatPtr maskFormat,
+		  INT16 xSrc, INT16 ySrc,
+		  int n, xPointFixed *points)
+{
+	ScreenPtr screen = dst->pDrawable->pScreen;
+
+	if (maskFormat) {
+		PixmapPtr scratch;
+		PicturePtr mask;
+		INT16 dst_x, dst_y;
+		BoxRec bounds;
+		int width, height, depth;
+		pixman_image_t *image;
+		pixman_format_code_t format;
+		int error;
+
+		dst_x = pixman_fixed_to_int(points->x);
+		dst_y = pixman_fixed_to_int(points->y);
+
+		miPointFixedBounds(n, points, &bounds);
+		DBG(("%s: bounds (%d, %d), (%d, %d)\n",
+		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
+			return;
+
+		if (!sna_compute_composite_extents(&bounds,
+						   src, NULL, dst,
+						   xSrc, ySrc,
+						   0, 0,
+						   bounds.x1, bounds.y1,
+						   bounds.x2 - bounds.x1,
+						   bounds.y2 - bounds.y1))
+			return;
+
+		DBG(("%s: extents (%d, %d), (%d, %d)\n",
+		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+		width  = bounds.x2 - bounds.x1;
+		height = bounds.y2 - bounds.y1;
+		bounds.x1 -= dst->pDrawable->x;
+		bounds.y1 -= dst->pDrawable->y;
+		depth = maskFormat->depth;
+		format = maskFormat->format | (BitsPerPixel(depth) << 24);
+
+		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
+		     __FUNCTION__, width, height, depth, format));
+		scratch = sna_pixmap_create_upload(screen,
+						   width, height, depth);
+		if (!scratch)
+			return;
+
+		memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
+		image = pixman_image_create_bits(format, width, height,
+						 scratch->devPrivate.ptr,
+						 scratch->devKind);
+		if (image) {
+			xTriangle tri;
+			xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
+			int i;
+
+			*p[0] = points[0];
+			*p[1] = points[1];
+			*p[2] = points[2];
+			pixman_add_triangles(image,
+					     -bounds.x1, -bounds.y1,
+					     1, (pixman_triangle_t *)&tri);
+			for (i = 3; i < n; i++) {
+				*p[i%3] = points[i];
+				pixman_add_triangles(image,
+						     -bounds.x1, -bounds.y1,
+						     1, (pixman_triangle_t *)&tri);
+			}
+			pixman_image_unref(image);
+		}
+
+		mask = CreatePicture(0, &scratch->drawable,
+				     PictureMatchFormat(screen, depth, format),
+				     0, 0, serverClient, &error);
+		screen->DestroyPixmap(scratch);
+		if (!mask)
+			return;
+
+		CompositePicture(op, src, mask, dst,
+				 xSrc + bounds.x1 - dst_x,
+				 ySrc + bounds.y1 - dst_y,
+				 0, 0,
+				 bounds.x1, bounds.y1,
+				 width, height);
+		FreePicture(mask, 0);
+	} else {
+		xTriangle tri;
+		xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
+		int i;
+
+		if (dst->polyEdge == PolyEdgeSharp)
+			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
+		else
+			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
+
+		*p[0] = points[0];
+		*p[1] = points[1];
+		*p[2] = points[2];
+		triangles_fallback(op,
+				   src, dst, maskFormat,
+				   xSrc, ySrc, 1, &tri);
+		for (i = 3; i < n; i++) {
+			*p[i%3] = points[i];
+			/* Should xSrc,ySrc be updated? */
+			triangles_fallback(op,
+					   src, dst, maskFormat,
+					   xSrc, ySrc, 1, &tri);
+		}
+	}
+}
+
+void
+sna_composite_tristrip(CARD8 op,
+		       PicturePtr src,
+		       PicturePtr dst,
+		       PictFormatPtr maskFormat,
+		       INT16 xSrc, INT16 ySrc,
+		       int n, xPointFixed *points)
+{
+	if (tristrip_span_converter(op, src, dst, maskFormat, xSrc, ySrc, n, points))
+		return;
+
+	tristrip_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points);
+}
+
+static void
+trifan_fallback(CARD8 op,
+		PicturePtr src,
+		PicturePtr dst,
+		PictFormatPtr maskFormat,
+		INT16 xSrc, INT16 ySrc,
+		int n, xPointFixed *points)
+{
+	ScreenPtr screen = dst->pDrawable->pScreen;
+
+	if (maskFormat) {
+		PixmapPtr scratch;
+		PicturePtr mask;
+		INT16 dst_x, dst_y;
+		BoxRec bounds;
+		int width, height, depth;
+		pixman_image_t *image;
+		pixman_format_code_t format;
+		int error;
+
+		dst_x = pixman_fixed_to_int(points->x);
+		dst_y = pixman_fixed_to_int(points->y);
+
+		miPointFixedBounds(n, points, &bounds);
+		DBG(("%s: bounds (%d, %d), (%d, %d)\n",
+		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+		if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2)
+			return;
+
+		if (!sna_compute_composite_extents(&bounds,
+						   src, NULL, dst,
+						   xSrc, ySrc,
+						   0, 0,
+						   bounds.x1, bounds.y1,
+						   bounds.x2 - bounds.x1,
+						   bounds.y2 - bounds.y1))
+			return;
+
+		DBG(("%s: extents (%d, %d), (%d, %d)\n",
+		     __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2));
+
+		width  = bounds.x2 - bounds.x1;
+		height = bounds.y2 - bounds.y1;
+		bounds.x1 -= dst->pDrawable->x;
+		bounds.y1 -= dst->pDrawable->y;
+		depth = maskFormat->depth;
+		format = maskFormat->format | (BitsPerPixel(depth) << 24);
+
+		DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n",
+		     __FUNCTION__, width, height, depth, format));
+		scratch = sna_pixmap_create_upload(screen,
+						   width, height, depth);
+		if (!scratch)
+			return;
+
+		memset(scratch->devPrivate.ptr, 0, scratch->devKind*height);
+		image = pixman_image_create_bits(format, width, height,
+						 scratch->devPrivate.ptr,
+						 scratch->devKind);
+		if (image) {
+			xTriangle tri;
+			xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
+			int i;
+
+			*p[0] = points[0];
+			*p[1] = points[1];
+			*p[2] = points[2];
+			pixman_add_triangles(image,
+					     -bounds.x1, -bounds.y1,
+					     1, (pixman_triangle_t *)&tri);
+			for (i = 3; i < n; i++) {
+				*p[1+ (i%2)] = points[i];
+				pixman_add_triangles(image,
+						     -bounds.x1, -bounds.y1,
+						     1, (pixman_triangle_t *)&tri);
+			}
+			pixman_image_unref(image);
+		}
+
+		mask = CreatePicture(0, &scratch->drawable,
+				     PictureMatchFormat(screen, depth, format),
+				     0, 0, serverClient, &error);
+		screen->DestroyPixmap(scratch);
+		if (!mask)
+			return;
+
+		CompositePicture(op, src, mask, dst,
+				 xSrc + bounds.x1 - dst_x,
+				 ySrc + bounds.y1 - dst_y,
+				 0, 0,
+				 bounds.x1, bounds.y1,
+				 width, height);
+		FreePicture(mask, 0);
+	} else {
+		xTriangle tri;
+		xPointFixed *p[3] = { &tri.p1, &tri.p2, &tri.p3 };
+		int i;
+
+		if (dst->polyEdge == PolyEdgeSharp)
+			maskFormat = PictureMatchFormat(screen, 1, PICT_a1);
+		else
+			maskFormat = PictureMatchFormat(screen, 8, PICT_a8);
+
+		*p[0] = points[0];
+		*p[1] = points[1];
+		*p[2] = points[2];
+		triangles_fallback(op,
+				   src, dst, maskFormat,
+				   xSrc, ySrc, 1, &tri);
+		for (i = 3; i < n; i++) {
+			*p[1 + (i%2)] = points[i];
+			/* Should xSrc,ySrc be updated? */
+			triangles_fallback(op,
+					   src, dst, maskFormat,
+					   xSrc, ySrc, 1, &tri);
+		}
+	}
+}
+
+void
+sna_composite_trifan(CARD8 op,
+		     PicturePtr src,
+		     PicturePtr dst,
+		     PictFormatPtr maskFormat,
+		     INT16 xSrc, INT16 ySrc,
+		     int n, xPointFixed *points)
+{
+	trifan_fallback(op, src, dst, maskFormat, xSrc, ySrc, n, points);
+}


More information about the xorg-commit mailing list