xf86-video-intel: src/sna/gen4_vertex.c

Chris Wilson ickle at kemper.freedesktop.org
Tue Mar 26 09:29:47 PDT 2013


 src/sna/gen4_vertex.c |  814 +++++++++++++++++++++++++-------------------------
 1 file changed, 407 insertions(+), 407 deletions(-)

New commits:
commit 1d6f4078c1e405749ff688058ef76c287ab36201
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Mar 26 16:28:26 2013 +0000

    sna/gen4+: Reorder code to compile on squeeze
    
    Remember to only use sse4_2, avx2 in their restricted sections that
    check for compiler support.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 78d8972..cd366c9 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -739,10 +739,10 @@ emit_boxes_identity_mask(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
-sse4_2 fastcall static void
-emit_primitive_identity_mask__sse4_2(struct sna *sna,
-				     const struct sna_composite_op *op,
-				     const struct sna_composite_rectangles *r)
+sse2 fastcall static void
+emit_primitive_linear_identity_mask(struct sna *sna,
+				    const struct sna_composite_op *op,
+				    const struct sna_composite_rectangles *r)
 {
 	union {
 		struct sna_coordinate p;
@@ -779,13 +779,15 @@ emit_primitive_identity_mask__sse4_2(struct sna *sna,
 	v[8] = dst.f;
 	v[11] = msk_y * op->mask.scale[1];
 
-	v[9] = v[5] = v[1] = .5;
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
 }
 
-sse4_2 fastcall static void
-emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
-				 const BoxRec *box, int nbox,
-				 float *v)
+sse2 fastcall static void
+emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
+				const BoxRec *box, int nbox,
+				float *v)
 {
 	float msk_x = op->mask.offset[0];
 	float msk_y = op->mask.offset[1];
@@ -810,64 +812,261 @@ emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
 		v[8] = dst.f;
 		v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
-		v[9] = v[5] = v[1] = .5;
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y2);
+		v[9] = compute_linear(&op->src, box->x1, box->y1);
+
 		v += 12;
 		box++;
 	} while (--nbox);
 }
 
-avx2 fastcall static void
-emit_primitive_identity_mask__avx2(struct sna *sna,
-				   const struct sna_composite_op *op,
-				   const struct sna_composite_rectangles *r)
+sse2 fastcall static void
+emit_primitive_identity_source_mask(struct sna *sna,
+				    const struct sna_composite_op *op,
+				    const struct sna_composite_rectangles *r)
 {
 	union {
 		struct sna_coordinate p;
 		float f;
 	} dst;
+	float src_x, src_y;
 	float msk_x, msk_y;
 	float w, h;
 	float *v;
 
+	src_x = r->src.x + op->src.offset[0];
+	src_y = r->src.y + op->src.offset[1];
 	msk_x = r->mask.x + op->mask.offset[0];
 	msk_y = r->mask.y + op->mask.offset[1];
 	w = r->width;
 	h = r->height;
 
-	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+	assert(op->floats_per_rect == 15);
+	assert((sna->render.vertex_used % 5) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 15;
 
-	assert(op->floats_per_rect == 12);
-	assert((sna->render.vertex_used % 4) == 0);
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[1] = (src_x + w) * op->src.scale[0];
+	v[2] = (src_y + h) * op->src.scale[1];
+	v[3] = (msk_x + w) * op->mask.scale[0];
+	v[4] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[5] = dst.f;
+	v[6] = src_x * op->src.scale[0];
+	v[7] = v[2];
+	v[8] = msk_x * op->mask.scale[0];
+	v[9] = v[4];
+
+	dst.p.y = r->dst.y;
+	v[10] = dst.f;
+	v[11] = v[6];
+	v[12] = src_y * op->src.scale[1];
+	v[13] = v[8];
+	v[14] = msk_y * op->mask.scale[1];
+}
+
+sse2 fastcall static void
+emit_primitive_simple_source_identity(struct sna *sna,
+				      const struct sna_composite_op *op,
+				      const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+	float msk_x = r->mask.x + op->mask.offset[0];
+	float msk_y = r->mask.y + op->mask.offset[1];
+	float w = r->width, h = r->height;
+
+	assert(op->floats_per_rect == 15);
+	assert((sna->render.vertex_used % 5) == 0);
 	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 12;
+	sna->render.vertex_used += 3*5;
 
 	dst.p.x = r->dst.x + r->width;
 	dst.p.y = r->dst.y + r->height;
 	v[0] = dst.f;
-	v[2] = (msk_x + w) * op->mask.scale[0];
-	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+	v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+	v[3] = (msk_x + w) * op->mask.scale[0];
+	v[4] = (msk_y + h) * op->mask.scale[1];
 
 	dst.p.x = r->dst.x;
-	v[4] = dst.f;
-	v[10] = v[6] = msk_x * op->mask.scale[0];
+	v[5] = dst.f;
+	v[6] = ((r->src.x + tx) * xx + x0) * sx;
+	v[7] = v[2];
+	v[8] = msk_x * op->mask.scale[0];
+	v[9] = v[4];
 
 	dst.p.y = r->dst.y;
-	v[8] = dst.f;
-	v[11] = msk_y * op->mask.scale[1];
+	v[10] = dst.f;
+	v[11] = v[6];
+	v[12] = ((r->src.y + ty) * yy + y0) * sy;
+	v[13] = v[8];
+	v[14] = msk_y * op->mask.scale[1];
+}
 
-	v[9] = v[5] = v[1] = .5;
+sse2 fastcall static void
+emit_primitive_affine_source_identity(struct sna *sna,
+				      const struct sna_composite_op *op,
+				      const struct sna_composite_rectangles *r)
+{
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float msk_x = r->mask.x + op->mask.offset[0];
+	float msk_y = r->mask.y + op->mask.offset[1];
+	float w = r->width, h = r->height;
+
+	assert(op->floats_per_rect == 15);
+	assert((sna->render.vertex_used % 5) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 3*5;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
+				    op->src.offset[1] + r->src.y + r->height,
+				    op->src.transform, op->src.scale,
+				    &v[1], &v[2]);
+	v[3] = (msk_x + w) * op->mask.scale[0];
+	v[4] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[5] = dst.f;
+	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+				    op->src.offset[1] + r->src.y + r->height,
+				    op->src.transform, op->src.scale,
+				    &v[6], &v[7]);
+	v[8] = msk_x * op->mask.scale[0];
+	v[9] = v[4];
+
+	dst.p.y = r->dst.y;
+	v[10] = dst.f;
+	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+				    op->src.offset[1] + r->src.y,
+				    op->src.transform, op->src.scale,
+				    &v[11], &v[12]);
+	v[13] = v[8];
+	v[14] = msk_y * op->mask.scale[1];
 }
 
-avx2 fastcall static void
-emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
-			       const BoxRec *box, int nbox,
-			       float *v)
+/* SSE4_2 */
+#if defined(sse4_2)
+
+sse4_2 fastcall static void
+emit_primitive_linear__sse4_2(struct sna *sna,
+			      const struct sna_composite_op *op,
+			      const struct sna_composite_rectangles *r)
 {
-	float msk_x = op->mask.offset[0];
-	float msk_y = op->mask.offset[1];
+	float *v;
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+
+	assert(op->floats_per_rect == 6);
+	assert((sna->render.vertex_used % 2) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 6;
+	assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[2] = dst.f;
+	dst.p.y = r->dst.y;
+	v[4] = dst.f;
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+sse4_2 fastcall static void
+emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
+			  const BoxRec *box, int nbox,
+			  float *v)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
 
 	do {
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		dst.p.x = box->x1;
+		v[2] = dst.f;
+		dst.p.y = box->y1;
+		v[4] = dst.f;
+
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[3] = compute_linear(&op->src, box->x1, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+		v += 6;
+		box++;
+	} while (--nbox);
+}
+
+sse4_2 fastcall static void
+emit_primitive_identity_source__sse4_2(struct sna *sna,
+				       const struct sna_composite_op *op,
+				       const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float *v;
+
+	assert(op->floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	dst.p.x = r->dst.x;
+	v[3] = dst.f;
+	dst.p.y = r->dst.y;
+	v[6] = dst.f;
+
+	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+	v[1] = v[4] + r->width * op->src.scale[0];
+
+	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+}
+
+sse4_2 fastcall static void
+emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
+				   const BoxRec *box, int nbox,
+				   float *v)
+{
+	do {
 		union {
 			struct sna_coordinate p;
 			float f;
@@ -876,75 +1075,75 @@ emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
 		dst.p.x = box->x2;
 		dst.p.y = box->y2;
 		v[0] = dst.f;
-		v[2] = (msk_x + box->x2) * op->mask.scale[0];
-		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-
 		dst.p.x = box->x1;
-		v[4] = dst.f;
-		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-
+		v[3] = dst.f;
 		dst.p.y = box->y1;
-		v[8] = dst.f;
-		v[11] = (msk_y + box->y1) * op->mask.scale[1];
+		v[6] = dst.f;
 
-		v[9] = v[5] = v[1] = .5;
-		v += 12;
+		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+		v += 9;
 		box++;
 	} while (--nbox);
 }
 
-sse2 fastcall static void
-emit_primitive_linear_identity_mask(struct sna *sna,
-				    const struct sna_composite_op *op,
-				    const struct sna_composite_rectangles *r)
+sse4_2 fastcall static void
+emit_primitive_simple_source__sse4_2(struct sna *sna,
+				     const struct sna_composite_op *op,
+				     const struct sna_composite_rectangles *r)
 {
+	float *v;
 	union {
 		struct sna_coordinate p;
 		float f;
 	} dst;
-	float msk_x, msk_y;
-	float w, h;
-	float *v;
-
-	msk_x = r->mask.x + op->mask.offset[0];
-	msk_y = r->mask.y + op->mask.offset[1];
-	w = r->width;
-	h = r->height;
-
-	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
-	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
-	assert(op->floats_per_rect == 12);
-	assert((sna->render.vertex_used % 4) == 0);
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
+
+	assert(op->floats_per_rect == 9);
+	assert((sna->render.vertex_used % 3) == 0);
 	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 12;
+	sna->render.vertex_used += 3*3;
 
 	dst.p.x = r->dst.x + r->width;
 	dst.p.y = r->dst.y + r->height;
 	v[0] = dst.f;
-	v[2] = (msk_x + w) * op->mask.scale[0];
-	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
 
 	dst.p.x = r->dst.x;
-	v[4] = dst.f;
-	v[10] = v[6] = msk_x * op->mask.scale[0];
+	v[3] = dst.f;
+	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
 
 	dst.p.y = r->dst.y;
-	v[8] = dst.f;
-	v[11] = msk_y * op->mask.scale[1];
-
-	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+	v[6] = dst.f;
+	v[8] = ((r->src.y + ty) * yy + y0) * sy;
 }
 
-sse2 fastcall static void
-emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
-				const BoxRec *box, int nbox,
-				float *v)
+sse4_2 fastcall static void
+emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
+				 const BoxRec *box, int nbox,
+				 float *v)
 {
-	float msk_x = op->mask.offset[0];
-	float msk_y = op->mask.offset[1];
+	float xx = op->src.transform->matrix[0][0];
+	float x0 = op->src.transform->matrix[0][2];
+	float yy = op->src.transform->matrix[1][1];
+	float y0 = op->src.transform->matrix[1][2];
+	float sx = op->src.scale[0];
+	float sy = op->src.scale[1];
+	int16_t tx = op->src.offset[0];
+	int16_t ty = op->src.offset[1];
 
 	do {
 		union {
@@ -955,30 +1154,26 @@ emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
 		dst.p.x = box->x2;
 		dst.p.y = box->y2;
 		v[0] = dst.f;
-		v[2] = (msk_x + box->x2) * op->mask.scale[0];
-		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+		v[1] = ((box->x2 + tx) * xx + x0) * sx;
+		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
 
 		dst.p.x = box->x1;
-		v[4] = dst.f;
-		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+		v[3] = dst.f;
+		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
 
 		dst.p.y = box->y1;
-		v[8] = dst.f;
-		v[11] = (msk_y + box->y1) * op->mask.scale[1];
-
-		v[1] = compute_linear(&op->src, box->x2, box->y2);
-		v[5] = compute_linear(&op->src, box->x1, box->y2);
-		v[9] = compute_linear(&op->src, box->x1, box->y1);
+		v[6] = dst.f;
+		v[8] = ((box->y1 + ty) * yy + y0) * sy;
 
-		v += 12;
+		v += 9;
 		box++;
 	} while (--nbox);
 }
 
 sse4_2 fastcall static void
-emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
-					    const struct sna_composite_op *op,
-					    const struct sna_composite_rectangles *r)
+emit_primitive_identity_mask__sse4_2(struct sna *sna,
+				     const struct sna_composite_op *op,
+				     const struct sna_composite_rectangles *r)
 {
 	union {
 		struct sna_coordinate p;
@@ -1015,15 +1210,13 @@ emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
 	v[8] = dst.f;
 	v[11] = msk_y * op->mask.scale[1];
 
-	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+	v[9] = v[5] = v[1] = .5;
 }
 
 sse4_2 fastcall static void
-emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
-					const BoxRec *box, int nbox,
-					float *v)
+emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
+				 const BoxRec *box, int nbox,
+				 float *v)
 {
 	float msk_x = op->mask.offset[0];
 	float msk_y = op->mask.offset[1];
@@ -1048,19 +1241,16 @@ emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
 		v[8] = dst.f;
 		v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
-		v[1] = compute_linear(&op->src, box->x2, box->y2);
-		v[5] = compute_linear(&op->src, box->x1, box->y2);
-		v[9] = compute_linear(&op->src, box->x1, box->y1);
-
+		v[9] = v[5] = v[1] = .5;
 		v += 12;
 		box++;
 	} while (--nbox);
 }
 
-avx2 fastcall static void
-emit_primitive_linear_identity_mask__avx2(struct sna *sna,
-					  const struct sna_composite_op *op,
-					  const struct sna_composite_rectangles *r)
+sse4_2 fastcall static void
+emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
+					    const struct sna_composite_op *op,
+					    const struct sna_composite_rectangles *r)
 {
 	union {
 		struct sna_coordinate p;
@@ -1102,10 +1292,10 @@ emit_primitive_linear_identity_mask__avx2(struct sna *sna,
 	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
 }
 
-avx2 fastcall static void
-emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
-				      const BoxRec *box, int nbox,
-				      float *v)
+sse4_2 fastcall static void
+emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
+					const BoxRec *box, int nbox,
+					float *v)
 {
 	float msk_x = op->mask.offset[0];
 	float msk_y = op->mask.offset[1];
@@ -1139,161 +1329,15 @@ emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
-sse2 fastcall static void
-emit_primitive_identity_source_mask(struct sna *sna,
-				    const struct sna_composite_op *op,
-				    const struct sna_composite_rectangles *r)
-{
-	union {
-		struct sna_coordinate p;
-		float f;
-	} dst;
-	float src_x, src_y;
-	float msk_x, msk_y;
-	float w, h;
-	float *v;
-
-	src_x = r->src.x + op->src.offset[0];
-	src_y = r->src.y + op->src.offset[1];
-	msk_x = r->mask.x + op->mask.offset[0];
-	msk_y = r->mask.y + op->mask.offset[1];
-	w = r->width;
-	h = r->height;
-
-	assert(op->floats_per_rect == 15);
-	assert((sna->render.vertex_used % 5) == 0);
-	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 15;
-
-	dst.p.x = r->dst.x + r->width;
-	dst.p.y = r->dst.y + r->height;
-	v[0] = dst.f;
-	v[1] = (src_x + w) * op->src.scale[0];
-	v[2] = (src_y + h) * op->src.scale[1];
-	v[3] = (msk_x + w) * op->mask.scale[0];
-	v[4] = (msk_y + h) * op->mask.scale[1];
-
-	dst.p.x = r->dst.x;
-	v[5] = dst.f;
-	v[6] = src_x * op->src.scale[0];
-	v[7] = v[2];
-	v[8] = msk_x * op->mask.scale[0];
-	v[9] = v[4];
-
-	dst.p.y = r->dst.y;
-	v[10] = dst.f;
-	v[11] = v[6];
-	v[12] = src_y * op->src.scale[1];
-	v[13] = v[8];
-	v[14] = msk_y * op->mask.scale[1];
-}
-
-sse2 fastcall static void
-emit_primitive_simple_source_identity(struct sna *sna,
-				      const struct sna_composite_op *op,
-				      const struct sna_composite_rectangles *r)
-{
-	float *v;
-	union {
-		struct sna_coordinate p;
-		float f;
-	} dst;
-
-	float xx = op->src.transform->matrix[0][0];
-	float x0 = op->src.transform->matrix[0][2];
-	float yy = op->src.transform->matrix[1][1];
-	float y0 = op->src.transform->matrix[1][2];
-	float sx = op->src.scale[0];
-	float sy = op->src.scale[1];
-	int16_t tx = op->src.offset[0];
-	int16_t ty = op->src.offset[1];
-	float msk_x = r->mask.x + op->mask.offset[0];
-	float msk_y = r->mask.y + op->mask.offset[1];
-	float w = r->width, h = r->height;
-
-	assert(op->floats_per_rect == 15);
-	assert((sna->render.vertex_used % 5) == 0);
-	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 3*5;
-
-	dst.p.x = r->dst.x + r->width;
-	dst.p.y = r->dst.y + r->height;
-	v[0] = dst.f;
-	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
-	v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
-	v[3] = (msk_x + w) * op->mask.scale[0];
-	v[4] = (msk_y + h) * op->mask.scale[1];
-
-	dst.p.x = r->dst.x;
-	v[5] = dst.f;
-	v[6] = ((r->src.x + tx) * xx + x0) * sx;
-	v[7] = v[2];
-	v[8] = msk_x * op->mask.scale[0];
-	v[9] = v[4];
-
-	dst.p.y = r->dst.y;
-	v[10] = dst.f;
-	v[11] = v[6];
-	v[12] = ((r->src.y + ty) * yy + y0) * sy;
-	v[13] = v[8];
-	v[14] = msk_y * op->mask.scale[1];
-}
-
-sse2 fastcall static void
-emit_primitive_affine_source_identity(struct sna *sna,
-				      const struct sna_composite_op *op,
-				      const struct sna_composite_rectangles *r)
-{
-	float *v;
-	union {
-		struct sna_coordinate p;
-		float f;
-	} dst;
-	float msk_x = r->mask.x + op->mask.offset[0];
-	float msk_y = r->mask.y + op->mask.offset[1];
-	float w = r->width, h = r->height;
-
-	assert(op->floats_per_rect == 15);
-	assert((sna->render.vertex_used % 5) == 0);
-	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 3*5;
-
-	dst.p.x = r->dst.x + r->width;
-	dst.p.y = r->dst.y + r->height;
-	v[0] = dst.f;
-	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
-				    op->src.offset[1] + r->src.y + r->height,
-				    op->src.transform, op->src.scale,
-				    &v[1], &v[2]);
-	v[3] = (msk_x + w) * op->mask.scale[0];
-	v[4] = (msk_y + h) * op->mask.scale[1];
-
-	dst.p.x = r->dst.x;
-	v[5] = dst.f;
-	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
-				    op->src.offset[1] + r->src.y + r->height,
-				    op->src.transform, op->src.scale,
-				    &v[6], &v[7]);
-	v[8] = msk_x * op->mask.scale[0];
-	v[9] = v[4];
-
-	dst.p.y = r->dst.y;
-	v[10] = dst.f;
-	_sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
-				    op->src.offset[1] + r->src.y,
-				    op->src.transform, op->src.scale,
-				    &v[11], &v[12]);
-	v[13] = v[8];
-	v[14] = msk_y * op->mask.scale[1];
-}
+#endif
 
-/* SSE4_2 */
-#if defined(sse4_2)
+/* AVX2 */
+#if defined(avx2)
 
-sse4_2 fastcall static void
-emit_primitive_linear__sse4_2(struct sna *sna,
-			      const struct sna_composite_op *op,
-			      const struct sna_composite_rectangles *r)
+avx2 fastcall static void
+emit_primitive_linear__avx2(struct sna *sna,
+			    const struct sna_composite_op *op,
+			    const struct sna_composite_rectangles *r)
 {
 	float *v;
 	union {
@@ -1320,10 +1364,10 @@ emit_primitive_linear__sse4_2(struct sna *sna,
 	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
 }
 
-sse4_2 fastcall static void
-emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
-			  const BoxRec *box, int nbox,
-			  float *v)
+avx2 fastcall static void
+emit_boxes_linear__avx2(const struct sna_composite_op *op,
+			const BoxRec *box, int nbox,
+			float *v)
 {
 	union {
 		struct sna_coordinate p;
@@ -1348,10 +1392,10 @@ emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
-sse4_2 fastcall static void
-emit_primitive_identity_source__sse4_2(struct sna *sna,
-				       const struct sna_composite_op *op,
-				       const struct sna_composite_rectangles *r)
+avx2 fastcall static void
+emit_primitive_identity_source__avx2(struct sna *sna,
+				     const struct sna_composite_op *op,
+				     const struct sna_composite_rectangles *r)
 {
 	union {
 		struct sna_coordinate p;
@@ -1379,10 +1423,10 @@ emit_primitive_identity_source__sse4_2(struct sna *sna,
 	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
 }
 
-sse4_2 fastcall static void
-emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
-				   const BoxRec *box, int nbox,
-				   float *v)
+avx2 fastcall static void
+emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
+				 const BoxRec *box, int nbox,
+				 float *v)
 {
 	do {
 		union {
@@ -1409,10 +1453,10 @@ emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
-sse4_2 fastcall static void
-emit_primitive_simple_source__sse4_2(struct sna *sna,
-				     const struct sna_composite_op *op,
-				     const struct sna_composite_rectangles *r)
+avx2 fastcall static void
+emit_primitive_simple_source__avx2(struct sna *sna,
+				   const struct sna_composite_op *op,
+				   const struct sna_composite_rectangles *r)
 {
 	float *v;
 	union {
@@ -1449,10 +1493,10 @@ emit_primitive_simple_source__sse4_2(struct sna *sna,
 	v[8] = ((r->src.y + ty) * yy + y0) * sy;
 }
 
-sse4_2 fastcall static void
-emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
-				 const BoxRec *box, int nbox,
-				 float *v)
+avx2 fastcall static void
+emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
+			       const BoxRec *box, int nbox,
+			       float *v)
 {
 	float xx = op->src.transform->matrix[0][0];
 	float x0 = op->src.transform->matrix[0][2];
@@ -1488,105 +1532,57 @@ emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
-#endif
-
-/* AVX2 */
-#if defined(avx2)
-
-avx2 fastcall static void
-emit_primitive_linear__avx2(struct sna *sna,
-			    const struct sna_composite_op *op,
-			    const struct sna_composite_rectangles *r)
-{
-	float *v;
-	union {
-		struct sna_coordinate p;
-		float f;
-	} dst;
-
-	assert(op->floats_per_rect == 6);
-	assert((sna->render.vertex_used % 2) == 0);
-	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 6;
-	assert(sna->render.vertex_used <= sna->render.vertex_size);
-
-	dst.p.x = r->dst.x + r->width;
-	dst.p.y = r->dst.y + r->height;
-	v[0] = dst.f;
-	dst.p.x = r->dst.x;
-	v[2] = dst.f;
-	dst.p.y = r->dst.y;
-	v[4] = dst.f;
-
-	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
-	v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
-	v[5] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-
 avx2 fastcall static void
-emit_boxes_linear__avx2(const struct sna_composite_op *op,
-			const BoxRec *box, int nbox,
-			float *v)
+emit_primitive_identity_mask__avx2(struct sna *sna,
+				   const struct sna_composite_op *op,
+				   const struct sna_composite_rectangles *r)
 {
 	union {
 		struct sna_coordinate p;
 		float f;
 	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
 
-	do {
-		dst.p.x = box->x2;
-		dst.p.y = box->y2;
-		v[0] = dst.f;
-		dst.p.x = box->x1;
-		v[2] = dst.f;
-		dst.p.y = box->y1;
-		v[4] = dst.f;
-
-		v[1] = compute_linear(&op->src, box->x2, box->y2);
-		v[3] = compute_linear(&op->src, box->x1, box->y2);
-		v[5] = compute_linear(&op->src, box->x1, box->y1);
-
-		v += 6;
-		box++;
-	} while (--nbox);
-}
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
 
-avx2 fastcall static void
-emit_primitive_identity_source__avx2(struct sna *sna,
-				     const struct sna_composite_op *op,
-				     const struct sna_composite_rectangles *r)
-{
-	union {
-		struct sna_coordinate p;
-		float f;
-	} dst;
-	float *v;
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
 
-	assert(op->floats_per_rect == 9);
-	assert((sna->render.vertex_used % 3) == 0);
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
 	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 9;
+	sna->render.vertex_used += 12;
 
 	dst.p.x = r->dst.x + r->width;
 	dst.p.y = r->dst.y + r->height;
 	v[0] = dst.f;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
 	dst.p.x = r->dst.x;
-	v[3] = dst.f;
-	dst.p.y = r->dst.y;
-	v[6] = dst.f;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
 
-	v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
-	v[1] = v[4] + r->width * op->src.scale[0];
+	dst.p.y = r->dst.y;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
 
-	v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
-	v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+	v[9] = v[5] = v[1] = .5;
 }
 
 avx2 fastcall static void
-emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
-				 const BoxRec *box, int nbox,
-				 float *v)
+emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
+			       const BoxRec *box, int nbox,
+			       float *v)
 {
+	float msk_x = op->mask.offset[0];
+	float msk_y = op->mask.offset[1];
+
 	do {
 		union {
 			struct sna_coordinate p;
@@ -1596,75 +1592,75 @@ emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
 		dst.p.x = box->x2;
 		dst.p.y = box->y2;
 		v[0] = dst.f;
-		dst.p.x = box->x1;
-		v[3] = dst.f;
-		dst.p.y = box->y1;
-		v[6] = dst.f;
+		v[2] = (msk_x + box->x2) * op->mask.scale[0];
+		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
-		v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
-		v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+		dst.p.x = box->x1;
+		v[4] = dst.f;
+		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
-		v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
-		v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+		dst.p.y = box->y1;
+		v[8] = dst.f;
+		v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
-		v += 9;
+		v[9] = v[5] = v[1] = .5;
+		v += 12;
 		box++;
 	} while (--nbox);
 }
 
 avx2 fastcall static void
-emit_primitive_simple_source__avx2(struct sna *sna,
-				   const struct sna_composite_op *op,
-				   const struct sna_composite_rectangles *r)
+emit_primitive_linear_identity_mask__avx2(struct sna *sna,
+					  const struct sna_composite_op *op,
+					  const struct sna_composite_rectangles *r)
 {
-	float *v;
 	union {
 		struct sna_coordinate p;
 		float f;
 	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
 
-	float xx = op->src.transform->matrix[0][0];
-	float x0 = op->src.transform->matrix[0][2];
-	float yy = op->src.transform->matrix[1][1];
-	float y0 = op->src.transform->matrix[1][2];
-	float sx = op->src.scale[0];
-	float sy = op->src.scale[1];
-	int16_t tx = op->src.offset[0];
-	int16_t ty = op->src.offset[1];
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
 
-	assert(op->floats_per_rect == 9);
-	assert((sna->render.vertex_used % 3) == 0);
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
 	v = sna->render.vertices + sna->render.vertex_used;
-	sna->render.vertex_used += 3*3;
+	sna->render.vertex_used += 12;
 
 	dst.p.x = r->dst.x + r->width;
 	dst.p.y = r->dst.y + r->height;
 	v[0] = dst.f;
-	v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
-	v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
 
 	dst.p.x = r->dst.x;
-	v[3] = dst.f;
-	v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
 
 	dst.p.y = r->dst.y;
-	v[6] = dst.f;
-	v[8] = ((r->src.y + ty) * yy + y0) * sy;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
 }
 
 avx2 fastcall static void
-emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
-			       const BoxRec *box, int nbox,
-			       float *v)
+emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
+				      const BoxRec *box, int nbox,
+				      float *v)
 {
-	float xx = op->src.transform->matrix[0][0];
-	float x0 = op->src.transform->matrix[0][2];
-	float yy = op->src.transform->matrix[1][1];
-	float y0 = op->src.transform->matrix[1][2];
-	float sx = op->src.scale[0];
-	float sy = op->src.scale[1];
-	int16_t tx = op->src.offset[0];
-	int16_t ty = op->src.offset[1];
+	float msk_x = op->mask.offset[0];
+	float msk_y = op->mask.offset[1];
 
 	do {
 		union {
@@ -1675,18 +1671,22 @@ emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
 		dst.p.x = box->x2;
 		dst.p.y = box->y2;
 		v[0] = dst.f;
-		v[1] = ((box->x2 + tx) * xx + x0) * sx;
-		v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+		v[2] = (msk_x + box->x2) * op->mask.scale[0];
+		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
 
 		dst.p.x = box->x1;
-		v[3] = dst.f;
-		v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+		v[4] = dst.f;
+		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
 
 		dst.p.y = box->y1;
-		v[6] = dst.f;
-		v[8] = ((box->y1 + ty) * yy + y0) * sy;
+		v[8] = dst.f;
+		v[11] = (msk_y + box->y1) * op->mask.scale[1];
 
-		v += 9;
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y2);
+		v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+		v += 12;
 		box++;
 	} while (--nbox);
 }


More information about the xorg-commit mailing list