xf86-video-intel: 2 commits - src/sna/gen4_vertex.c src/sna/sna_cpu.c

Tue Mar 26 08:32:22 PDT 2013

src/sna/gen4_vertex.c |  354 +++++++++++++++++++++++++++++++++++++++++++++++++-
 src/sna/sna_cpu.c     |    2 
 2 files changed, 351 insertions(+), 5 deletions(-)

New commits:
commit da2bd82b0e10f8a263f7ab4fb49b440349d97ace
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Mar 26 15:13:32 2013 +0000

    sna/gen4+: Add a couple more advanced SSE variants for composite vertices
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index d842585..78d8972 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -739,6 +739,160 @@ emit_boxes_identity_mask(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_primitive_identity_mask__sse4_2(struct sna *sna,
+				     const struct sna_composite_op *op,
+				     const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
+
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
+
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
+
+	dst.p.y = r->dst.y;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
+
+	v[9] = v[5] = v[1] = .5;
+}
+
+sse4_2 fastcall static void
+emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
+				 const BoxRec *box, int nbox,
+				 float *v)
+{
+	float msk_x = op->mask.offset[0];
+	float msk_y = op->mask.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		v[2] = (msk_x + box->x2) * op->mask.scale[0];
+		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+		dst.p.x = box->x1;
+		v[4] = dst.f;
+		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+		dst.p.y = box->y1;
+		v[8] = dst.f;
+		v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+		v[9] = v[5] = v[1] = .5;
+		v += 12;
+		box++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_primitive_identity_mask__avx2(struct sna *sna,
+				   const struct sna_composite_op *op,
+				   const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
+
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
+
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
+
+	dst.p.y = r->dst.y;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
+
+	v[9] = v[5] = v[1] = .5;
+}
+
+avx2 fastcall static void
+emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
+			       const BoxRec *box, int nbox,
+			       float *v)
+{
+	float msk_x = op->mask.offset[0];
+	float msk_y = op->mask.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		v[2] = (msk_x + box->x2) * op->mask.scale[0];
+		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+		dst.p.x = box->x1;
+		v[4] = dst.f;
+		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+		dst.p.y = box->y1;
+		v[8] = dst.f;
+		v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+		v[9] = v[5] = v[1] = .5;
+		v += 12;
+		box++;
+	} while (--nbox);
+}
+
 sse2 fastcall static void
 emit_primitive_linear_identity_mask(struct sna *sna,
 				    const struct sna_composite_op *op,
@@ -821,6 +975,170 @@ emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
 	} while (--nbox);
 }
 
+sse4_2 fastcall static void
+emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
+					    const struct sna_composite_op *op,
+					    const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
+
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
+
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
+
+	dst.p.y = r->dst.y;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+sse4_2 fastcall static void
+emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
+					const BoxRec *box, int nbox,
+					float *v)
+{
+	float msk_x = op->mask.offset[0];
+	float msk_y = op->mask.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		v[2] = (msk_x + box->x2) * op->mask.scale[0];
+		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+		dst.p.x = box->x1;
+		v[4] = dst.f;
+		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+		dst.p.y = box->y1;
+		v[8] = dst.f;
+		v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y2);
+		v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+		v += 12;
+		box++;
+	} while (--nbox);
+}
+
+avx2 fastcall static void
+emit_primitive_linear_identity_mask__avx2(struct sna *sna,
+					  const struct sna_composite_op *op,
+					  const struct sna_composite_rectangles *r)
+{
+	union {
+		struct sna_coordinate p;
+		float f;
+	} dst;
+	float msk_x, msk_y;
+	float w, h;
+	float *v;
+
+	msk_x = r->mask.x + op->mask.offset[0];
+	msk_y = r->mask.y + op->mask.offset[1];
+	w = r->width;
+	h = r->height;
+
+	DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+	     __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+	assert(op->floats_per_rect == 12);
+	assert((sna->render.vertex_used % 4) == 0);
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 12;
+
+	dst.p.x = r->dst.x + r->width;
+	dst.p.y = r->dst.y + r->height;
+	v[0] = dst.f;
+	v[2] = (msk_x + w) * op->mask.scale[0];
+	v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
+	dst.p.x = r->dst.x;
+	v[4] = dst.f;
+	v[10] = v[6] = msk_x * op->mask.scale[0];
+
+	dst.p.y = r->dst.y;
+	v[8] = dst.f;
+	v[11] = msk_y * op->mask.scale[1];
+
+	v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+	v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+	v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+avx2 fastcall static void
+emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
+				      const BoxRec *box, int nbox,
+				      float *v)
+{
+	float msk_x = op->mask.offset[0];
+	float msk_y = op->mask.offset[1];
+
+	do {
+		union {
+			struct sna_coordinate p;
+			float f;
+		} dst;
+
+		dst.p.x = box->x2;
+		dst.p.y = box->y2;
+		v[0] = dst.f;
+		v[2] = (msk_x + box->x2) * op->mask.scale[0];
+		v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+
+		dst.p.x = box->x1;
+		v[4] = dst.f;
+		v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+
+		dst.p.y = box->y1;
+		v[8] = dst.f;
+		v[11] = (msk_y + box->y1) * op->mask.scale[1];
+
+		v[1] = compute_linear(&op->src, box->x2, box->y2);
+		v[5] = compute_linear(&op->src, box->x1, box->y2);
+		v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+		v += 12;
+		box++;
+	} while (--nbox);
+}
+
 sse2 fastcall static void
 emit_primitive_identity_source_mask(struct sna *sna,
 				    const struct sna_composite_op *op,
@@ -1383,14 +1701,42 @@ unsigned gen4_choose_composite_emitter(struct sna *sna, struct sna_composite_op
 		if (tmp->mask.transform == NULL) {
 			if (tmp->src.is_solid) {
 				DBG(("%s: solid, identity mask\n", __FUNCTION__));
-				tmp->prim_emit = emit_primitive_identity_mask;
-				tmp->emit_boxes = emit_boxes_identity_mask;
+#if defined(avx2)
+				if (sna->cpu_features & AVX2) {
+					tmp->prim_emit = emit_primitive_identity_mask__avx2;
+					tmp->emit_boxes = emit_boxes_identity_mask__avx2;
+				} else
+#endif
+#if defined(sse4_2)
+				if (sna->cpu_features & SSE4_2) {
+					tmp->prim_emit = emit_primitive_identity_mask__sse4_2;
+					tmp->emit_boxes = emit_boxes_identity_mask__sse4_2;
+				} else
+#endif
+				{
+					tmp->prim_emit = emit_primitive_identity_mask;
+					tmp->emit_boxes = emit_boxes_identity_mask;
+				}
 				tmp->floats_per_vertex = 4;
 				vb = 1 | 2 << 2;
 			} else if (tmp->src.is_linear) {
 				DBG(("%s: linear, identity mask\n", __FUNCTION__));
-				tmp->prim_emit = emit_primitive_linear_identity_mask;
-				tmp->emit_boxes = emit_boxes_linear_identity_mask;
+#if defined(avx2)
+				if (sna->cpu_features & AVX2) {
+					tmp->prim_emit = emit_primitive_linear_identity_mask__avx2;
+					tmp->emit_boxes = emit_boxes_linear_identity_mask__avx2;
+				} else
+#endif
+#if defined(sse4_2)
+				if (sna->cpu_features & SSE4_2) {
+					tmp->prim_emit = emit_primitive_linear_identity_mask__sse4_2;
+					tmp->emit_boxes = emit_boxes_linear_identity_mask__sse4_2;
+				} else
+#endif
+				{
+					tmp->prim_emit = emit_primitive_linear_identity_mask;
+					tmp->emit_boxes = emit_boxes_linear_identity_mask;
+				}
 				tmp->floats_per_vertex = 4;
 				vb = 1 | 2 << 2;
 			} else if (tmp->src.transform == NULL) {
commit 86bcf4a9f5dff98eab076df5872c3c412c0c13af
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Mar 26 15:13:08 2013 +0000

    sna: AVX2 is in the ebx result of cpuid
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_cpu.c b/src/sna/sna_cpu.c
index ef6843f..1386988 100644
--- a/src/sna/sna_cpu.c
+++ b/src/sna/sna_cpu.c
@@ -71,7 +71,7 @@ unsigned sna_cpu_detect(void)
 	}
 
 	if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
-		if (eax & bit_AVX2)
+		if (ebx & bit_AVX2)
 			features |= AVX2;
 	}