xf86-video-intel: src/sna/gen4_vertex.c
Chris Wilson
ickle at kemper.freedesktop.org
Tue Mar 26 09:29:47 PDT 2013
src/sna/gen4_vertex.c | 814 +++++++++++++++++++++++++-------------------------
1 file changed, 407 insertions(+), 407 deletions(-)
New commits:
commit 1d6f4078c1e405749ff688058ef76c287ab36201
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Mar 26 16:28:26 2013 +0000
sna/gen4+: Reorder code to compile on squeeze
Remember to only use sse4_2, avx2 in their restricted sections that
check for compiler support.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index 78d8972..cd366c9 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -739,10 +739,10 @@ emit_boxes_identity_mask(const struct sna_composite_op *op,
} while (--nbox);
}
-sse4_2 fastcall static void
-emit_primitive_identity_mask__sse4_2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+sse2 fastcall static void
+emit_primitive_linear_identity_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
@@ -779,13 +779,15 @@ emit_primitive_identity_mask__sse4_2(struct sna *sna,
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
- v[9] = v[5] = v[1] = .5;
+ v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+ v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+ v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
-sse4_2 fastcall static void
-emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+sse2 fastcall static void
+emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
@@ -810,64 +812,261 @@ emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
- v[9] = v[5] = v[1] = .5;
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y2);
+ v[9] = compute_linear(&op->src, box->x1, box->y1);
+
v += 12;
box++;
} while (--nbox);
}
-avx2 fastcall static void
-emit_primitive_identity_mask__avx2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+sse2 fastcall static void
+emit_primitive_identity_source_mask(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
+ float src_x, src_y;
float msk_x, msk_y;
float w, h;
float *v;
+ src_x = r->src.x + op->src.offset[0];
+ src_y = r->src.y + op->src.offset[1];
msk_x = r->mask.x + op->mask.offset[0];
msk_y = r->mask.y + op->mask.offset[1];
w = r->width;
h = r->height;
- DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
- __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+ assert(op->floats_per_rect == 15);
+ assert((sna->render.vertex_used % 5) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 15;
- assert(op->floats_per_rect == 12);
- assert((sna->render.vertex_used % 4) == 0);
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ v[1] = (src_x + w) * op->src.scale[0];
+ v[2] = (src_y + h) * op->src.scale[1];
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ v[6] = src_x * op->src.scale[0];
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = src_y * op->src.scale[1];
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
+
+sse2 fastcall static void
+emit_primitive_simple_source_identity(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+ float msk_x = r->mask.x + op->mask.offset[0];
+ float msk_y = r->mask.y + op->mask.offset[1];
+ float w = r->width, h = r->height;
+
+ assert(op->floats_per_rect == 15);
+ assert((sna->render.vertex_used % 5) == 0);
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 12;
+ sna->render.vertex_used += 3*5;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
- v[2] = (msk_x + w) * op->mask.scale[0];
- v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+ v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+ v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
dst.p.x = r->dst.x;
- v[4] = dst.f;
- v[10] = v[6] = msk_x * op->mask.scale[0];
+ v[5] = dst.f;
+ v[6] = ((r->src.x + tx) * xx + x0) * sx;
+ v[7] = v[2];
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
dst.p.y = r->dst.y;
- v[8] = dst.f;
- v[11] = msk_y * op->mask.scale[1];
+ v[10] = dst.f;
+ v[11] = v[6];
+ v[12] = ((r->src.y + ty) * yy + y0) * sy;
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
+}
- v[9] = v[5] = v[1] = .5;
+sse2 fastcall static void
+emit_primitive_affine_source_identity(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float msk_x = r->mask.x + op->mask.offset[0];
+ float msk_y = r->mask.y + op->mask.offset[1];
+ float w = r->width, h = r->height;
+
+ assert(op->floats_per_rect == 15);
+ assert((sna->render.vertex_used % 5) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 3*5;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform, op->src.scale,
+ &v[1], &v[2]);
+ v[3] = (msk_x + w) * op->mask.scale[0];
+ v[4] = (msk_y + h) * op->mask.scale[1];
+
+ dst.p.x = r->dst.x;
+ v[5] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y + r->height,
+ op->src.transform, op->src.scale,
+ &v[6], &v[7]);
+ v[8] = msk_x * op->mask.scale[0];
+ v[9] = v[4];
+
+ dst.p.y = r->dst.y;
+ v[10] = dst.f;
+ _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
+ op->src.offset[1] + r->src.y,
+ op->src.transform, op->src.scale,
+ &v[11], &v[12]);
+ v[13] = v[8];
+ v[14] = msk_y * op->mask.scale[1];
}
-avx2 fastcall static void
-emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+/* SSE4_2 */
+#if defined(sse4_2)
+
+sse4_2 fastcall static void
+emit_primitive_linear__sse4_2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
- float msk_x = op->mask.offset[0];
- float msk_y = op->mask.offset[1];
+ float *v;
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+
+ assert(op->floats_per_rect == 6);
+ assert((sna->render.vertex_used % 2) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 6;
+ assert(sna->render.vertex_used <= sna->render.vertex_size);
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ dst.p.x = r->dst.x;
+ v[2] = dst.f;
+ dst.p.y = r->dst.y;
+ v[4] = dst.f;
+
+ v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+ v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+ v[5] = compute_linear(&op->src, r->src.x, r->src.y);
+}
+
+sse4_2 fastcall static void
+emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
do {
+ dst.p.x = box->x2;
+ dst.p.y = box->y2;
+ v[0] = dst.f;
+ dst.p.x = box->x1;
+ v[2] = dst.f;
+ dst.p.y = box->y1;
+ v[4] = dst.f;
+
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[3] = compute_linear(&op->src, box->x1, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 6;
+ box++;
+ } while (--nbox);
+}
+
+sse4_2 fastcall static void
+emit_primitive_identity_source__sse4_2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ union {
+ struct sna_coordinate p;
+ float f;
+ } dst;
+ float *v;
+
+ assert(op->floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
+ v = sna->render.vertices + sna->render.vertex_used;
+ sna->render.vertex_used += 9;
+
+ dst.p.x = r->dst.x + r->width;
+ dst.p.y = r->dst.y + r->height;
+ v[0] = dst.f;
+ dst.p.x = r->dst.x;
+ v[3] = dst.f;
+ dst.p.y = r->dst.y;
+ v[6] = dst.f;
+
+ v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
+ v[1] = v[4] + r->width * op->src.scale[0];
+
+ v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
+ v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+}
+
+sse4_2 fastcall static void
+emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
+{
+ do {
union {
struct sna_coordinate p;
float f;
@@ -876,75 +1075,75 @@ emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
- v[2] = (msk_x + box->x2) * op->mask.scale[0];
- v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
-
dst.p.x = box->x1;
- v[4] = dst.f;
- v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
-
+ v[3] = dst.f;
dst.p.y = box->y1;
- v[8] = dst.f;
- v[11] = (msk_y + box->y1) * op->mask.scale[1];
+ v[6] = dst.f;
- v[9] = v[5] = v[1] = .5;
- v += 12;
+ v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
+ v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+
+ v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
+ v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+
+ v += 9;
box++;
} while (--nbox);
}
-sse2 fastcall static void
-emit_primitive_linear_identity_mask(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+sse4_2 fastcall static void
+emit_primitive_simple_source__sse4_2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
+ float *v;
union {
struct sna_coordinate p;
float f;
} dst;
- float msk_x, msk_y;
- float w, h;
- float *v;
-
- msk_x = r->mask.x + op->mask.offset[0];
- msk_y = r->mask.y + op->mask.offset[1];
- w = r->width;
- h = r->height;
-
- DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
- __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
- assert(op->floats_per_rect == 12);
- assert((sna->render.vertex_used % 4) == 0);
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
+
+ assert(op->floats_per_rect == 9);
+ assert((sna->render.vertex_used % 3) == 0);
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 12;
+ sna->render.vertex_used += 3*3;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
- v[2] = (msk_x + w) * op->mask.scale[0];
- v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+ v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
+ v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
dst.p.x = r->dst.x;
- v[4] = dst.f;
- v[10] = v[6] = msk_x * op->mask.scale[0];
+ v[3] = dst.f;
+ v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
dst.p.y = r->dst.y;
- v[8] = dst.f;
- v[11] = msk_y * op->mask.scale[1];
-
- v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
- v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
- v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+ v[6] = dst.f;
+ v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
-sse2 fastcall static void
-emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+sse4_2 fastcall static void
+emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
- float msk_x = op->mask.offset[0];
- float msk_y = op->mask.offset[1];
+ float xx = op->src.transform->matrix[0][0];
+ float x0 = op->src.transform->matrix[0][2];
+ float yy = op->src.transform->matrix[1][1];
+ float y0 = op->src.transform->matrix[1][2];
+ float sx = op->src.scale[0];
+ float sy = op->src.scale[1];
+ int16_t tx = op->src.offset[0];
+ int16_t ty = op->src.offset[1];
do {
union {
@@ -955,30 +1154,26 @@ emit_boxes_linear_identity_mask(const struct sna_composite_op *op,
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
- v[2] = (msk_x + box->x2) * op->mask.scale[0];
- v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
+ v[1] = ((box->x2 + tx) * xx + x0) * sx;
+ v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
dst.p.x = box->x1;
- v[4] = dst.f;
- v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
+ v[3] = dst.f;
+ v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
dst.p.y = box->y1;
- v[8] = dst.f;
- v[11] = (msk_y + box->y1) * op->mask.scale[1];
-
- v[1] = compute_linear(&op->src, box->x2, box->y2);
- v[5] = compute_linear(&op->src, box->x1, box->y2);
- v[9] = compute_linear(&op->src, box->x1, box->y1);
+ v[6] = dst.f;
+ v[8] = ((box->y1 + ty) * yy + y0) * sy;
- v += 12;
+ v += 9;
box++;
} while (--nbox);
}
sse4_2 fastcall static void
-emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+emit_primitive_identity_mask__sse4_2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
@@ -1015,15 +1210,13 @@ emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
v[8] = dst.f;
v[11] = msk_y * op->mask.scale[1];
- v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
- v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
- v[9] = compute_linear(&op->src, r->src.x, r->src.y);
+ v[9] = v[5] = v[1] = .5;
}
sse4_2 fastcall static void
-emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+emit_boxes_identity_mask__sse4_2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
@@ -1048,19 +1241,16 @@ emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
v[8] = dst.f;
v[11] = (msk_y + box->y1) * op->mask.scale[1];
- v[1] = compute_linear(&op->src, box->x2, box->y2);
- v[5] = compute_linear(&op->src, box->x1, box->y2);
- v[9] = compute_linear(&op->src, box->x1, box->y1);
-
+ v[9] = v[5] = v[1] = .5;
v += 12;
box++;
} while (--nbox);
}
-avx2 fastcall static void
-emit_primitive_linear_identity_mask__avx2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+sse4_2 fastcall static void
+emit_primitive_linear_identity_mask__sse4_2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
@@ -1102,10 +1292,10 @@ emit_primitive_linear_identity_mask__avx2(struct sna *sna,
v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
-avx2 fastcall static void
-emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+sse4_2 fastcall static void
+emit_boxes_linear_identity_mask__sse4_2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
float msk_x = op->mask.offset[0];
float msk_y = op->mask.offset[1];
@@ -1139,161 +1329,15 @@ emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
} while (--nbox);
}
-sse2 fastcall static void
-emit_primitive_identity_source_mask(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float src_x, src_y;
- float msk_x, msk_y;
- float w, h;
- float *v;
-
- src_x = r->src.x + op->src.offset[0];
- src_y = r->src.y + op->src.offset[1];
- msk_x = r->mask.x + op->mask.offset[0];
- msk_y = r->mask.y + op->mask.offset[1];
- w = r->width;
- h = r->height;
-
- assert(op->floats_per_rect == 15);
- assert((sna->render.vertex_used % 5) == 0);
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 15;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = (src_x + w) * op->src.scale[0];
- v[2] = (src_y + h) * op->src.scale[1];
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = src_x * op->src.scale[0];
- v[7] = v[2];
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = src_y * op->src.scale[1];
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
-
-sse2 fastcall static void
-emit_primitive_simple_source_identity(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- float xx = op->src.transform->matrix[0][0];
- float x0 = op->src.transform->matrix[0][2];
- float yy = op->src.transform->matrix[1][1];
- float y0 = op->src.transform->matrix[1][2];
- float sx = op->src.scale[0];
- float sy = op->src.scale[1];
- int16_t tx = op->src.offset[0];
- int16_t ty = op->src.offset[1];
- float msk_x = r->mask.x + op->mask.offset[0];
- float msk_y = r->mask.y + op->mask.offset[1];
- float w = r->width, h = r->height;
-
- assert(op->floats_per_rect == 15);
- assert((sna->render.vertex_used % 5) == 0);
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*5;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
- v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- v[6] = ((r->src.x + tx) * xx + x0) * sx;
- v[7] = v[2];
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- v[11] = v[6];
- v[12] = ((r->src.y + ty) * yy + y0) * sy;
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
-
-sse2 fastcall static void
-emit_primitive_affine_source_identity(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float msk_x = r->mask.x + op->mask.offset[0];
- float msk_y = r->mask.y + op->mask.offset[1];
- float w = r->width, h = r->height;
-
- assert(op->floats_per_rect == 15);
- assert((sna->render.vertex_used % 5) == 0);
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*5;
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- _sna_get_transformed_scaled(op->src.offset[0] + r->src.x + r->width,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform, op->src.scale,
- &v[1], &v[2]);
- v[3] = (msk_x + w) * op->mask.scale[0];
- v[4] = (msk_y + h) * op->mask.scale[1];
-
- dst.p.x = r->dst.x;
- v[5] = dst.f;
- _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y + r->height,
- op->src.transform, op->src.scale,
- &v[6], &v[7]);
- v[8] = msk_x * op->mask.scale[0];
- v[9] = v[4];
-
- dst.p.y = r->dst.y;
- v[10] = dst.f;
- _sna_get_transformed_scaled(op->src.offset[0] + r->src.x,
- op->src.offset[1] + r->src.y,
- op->src.transform, op->src.scale,
- &v[11], &v[12]);
- v[13] = v[8];
- v[14] = msk_y * op->mask.scale[1];
-}
+#endif
-/* SSE4_2 */
-#if defined(sse4_2)
+/* AVX2 */
+#if defined(avx2)
-sse4_2 fastcall static void
-emit_primitive_linear__sse4_2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+avx2 fastcall static void
+emit_primitive_linear__avx2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
float *v;
union {
@@ -1320,10 +1364,10 @@ emit_primitive_linear__sse4_2(struct sna *sna,
v[5] = compute_linear(&op->src, r->src.x, r->src.y);
}
-sse4_2 fastcall static void
-emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+avx2 fastcall static void
+emit_boxes_linear__avx2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
union {
struct sna_coordinate p;
@@ -1348,10 +1392,10 @@ emit_boxes_linear__sse4_2(const struct sna_composite_op *op,
} while (--nbox);
}
-sse4_2 fastcall static void
-emit_primitive_identity_source__sse4_2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+avx2 fastcall static void
+emit_primitive_identity_source__avx2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
@@ -1379,10 +1423,10 @@ emit_primitive_identity_source__sse4_2(struct sna *sna,
v[5] = v[2] = v[8] + r->height * op->src.scale[1];
}
-sse4_2 fastcall static void
-emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+avx2 fastcall static void
+emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
do {
union {
@@ -1409,10 +1453,10 @@ emit_boxes_identity_source__sse4_2(const struct sna_composite_op *op,
} while (--nbox);
}
-sse4_2 fastcall static void
-emit_primitive_simple_source__sse4_2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+avx2 fastcall static void
+emit_primitive_simple_source__avx2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
float *v;
union {
@@ -1449,10 +1493,10 @@ emit_primitive_simple_source__sse4_2(struct sna *sna,
v[8] = ((r->src.y + ty) * yy + y0) * sy;
}
-sse4_2 fastcall static void
-emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+avx2 fastcall static void
+emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
float xx = op->src.transform->matrix[0][0];
float x0 = op->src.transform->matrix[0][2];
@@ -1488,105 +1532,57 @@ emit_boxes_simple_source__sse4_2(const struct sna_composite_op *op,
} while (--nbox);
}
-#endif
-
-/* AVX2 */
-#if defined(avx2)
-
-avx2 fastcall static void
-emit_primitive_linear__avx2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- float *v;
- union {
- struct sna_coordinate p;
- float f;
- } dst;
-
- assert(op->floats_per_rect == 6);
- assert((sna->render.vertex_used % 2) == 0);
- v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 6;
- assert(sna->render.vertex_used <= sna->render.vertex_size);
-
- dst.p.x = r->dst.x + r->width;
- dst.p.y = r->dst.y + r->height;
- v[0] = dst.f;
- dst.p.x = r->dst.x;
- v[2] = dst.f;
- dst.p.y = r->dst.y;
- v[4] = dst.f;
-
- v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
- v[3] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
- v[5] = compute_linear(&op->src, r->src.x, r->src.y);
-}
-
avx2 fastcall static void
-emit_boxes_linear__avx2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+emit_primitive_identity_mask__avx2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
union {
struct sna_coordinate p;
float f;
} dst;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
- do {
- dst.p.x = box->x2;
- dst.p.y = box->y2;
- v[0] = dst.f;
- dst.p.x = box->x1;
- v[2] = dst.f;
- dst.p.y = box->y1;
- v[4] = dst.f;
-
- v[1] = compute_linear(&op->src, box->x2, box->y2);
- v[3] = compute_linear(&op->src, box->x1, box->y2);
- v[5] = compute_linear(&op->src, box->x1, box->y1);
-
- v += 6;
- box++;
- } while (--nbox);
-}
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
-avx2 fastcall static void
-emit_primitive_identity_source__avx2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
-{
- union {
- struct sna_coordinate p;
- float f;
- } dst;
- float *v;
+ DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+ __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
- assert(op->floats_per_rect == 9);
- assert((sna->render.vertex_used % 3) == 0);
+ assert(op->floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 9;
+ sna->render.vertex_used += 12;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
+ v[2] = (msk_x + w) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
+
dst.p.x = r->dst.x;
- v[3] = dst.f;
- dst.p.y = r->dst.y;
- v[6] = dst.f;
+ v[4] = dst.f;
+ v[10] = v[6] = msk_x * op->mask.scale[0];
- v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0];
- v[1] = v[4] + r->width * op->src.scale[0];
+ dst.p.y = r->dst.y;
+ v[8] = dst.f;
+ v[11] = msk_y * op->mask.scale[1];
- v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1];
- v[5] = v[2] = v[8] + r->height * op->src.scale[1];
+ v[9] = v[5] = v[1] = .5;
}
avx2 fastcall static void
-emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+emit_boxes_identity_mask__avx2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
+
do {
union {
struct sna_coordinate p;
@@ -1596,75 +1592,75 @@ emit_boxes_identity_source__avx2(const struct sna_composite_op *op,
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
- dst.p.x = box->x1;
- v[3] = dst.f;
- dst.p.y = box->y1;
- v[6] = dst.f;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
- v[7] = v[4] = (box->x1 + op->src.offset[0]) * op->src.scale[0];
- v[1] = (box->x2 + op->src.offset[0]) * op->src.scale[0];
+ dst.p.x = box->x1;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
- v[8] = (box->y1 + op->src.offset[1]) * op->src.scale[1];
- v[2] = v[5] = (box->y2 + op->src.offset[1]) * op->src.scale[1];
+ dst.p.y = box->y1;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
- v += 9;
+ v[9] = v[5] = v[1] = .5;
+ v += 12;
box++;
} while (--nbox);
}
avx2 fastcall static void
-emit_primitive_simple_source__avx2(struct sna *sna,
- const struct sna_composite_op *op,
- const struct sna_composite_rectangles *r)
+emit_primitive_linear_identity_mask__avx2(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
{
- float *v;
union {
struct sna_coordinate p;
float f;
} dst;
+ float msk_x, msk_y;
+ float w, h;
+ float *v;
- float xx = op->src.transform->matrix[0][0];
- float x0 = op->src.transform->matrix[0][2];
- float yy = op->src.transform->matrix[1][1];
- float y0 = op->src.transform->matrix[1][2];
- float sx = op->src.scale[0];
- float sy = op->src.scale[1];
- int16_t tx = op->src.offset[0];
- int16_t ty = op->src.offset[1];
+ msk_x = r->mask.x + op->mask.offset[0];
+ msk_y = r->mask.y + op->mask.offset[1];
+ w = r->width;
+ h = r->height;
- assert(op->floats_per_rect == 9);
- assert((sna->render.vertex_used % 3) == 0);
+ DBG(("%s: dst=(%d, %d), mask=(%f, %f) x (%f, %f)\n",
+ __FUNCTION__, r->dst.x, r->dst.y, msk_x, msk_y, w, h));
+
+ assert(op->floats_per_rect == 12);
+ assert((sna->render.vertex_used % 4) == 0);
v = sna->render.vertices + sna->render.vertex_used;
- sna->render.vertex_used += 3*3;
+ sna->render.vertex_used += 12;
dst.p.x = r->dst.x + r->width;
dst.p.y = r->dst.y + r->height;
v[0] = dst.f;
- v[1] = ((r->src.x + r->width + tx) * xx + x0) * sx;
- v[5] = v[2] = ((r->src.y + r->height + ty) * yy + y0) * sy;
+ v[2] = (msk_x + w) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + h) * op->mask.scale[1];
dst.p.x = r->dst.x;
- v[3] = dst.f;
- v[7] = v[4] = ((r->src.x + tx) * xx + x0) * sx;
+ v[4] = dst.f;
+ v[10] = v[6] = msk_x * op->mask.scale[0];
dst.p.y = r->dst.y;
- v[6] = dst.f;
- v[8] = ((r->src.y + ty) * yy + y0) * sy;
+ v[8] = dst.f;
+ v[11] = msk_y * op->mask.scale[1];
+
+ v[1] = compute_linear(&op->src, r->src.x+r->width, r->src.y+r->height);
+ v[5] = compute_linear(&op->src, r->src.x, r->src.y+r->height);
+ v[9] = compute_linear(&op->src, r->src.x, r->src.y);
}
avx2 fastcall static void
-emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
- const BoxRec *box, int nbox,
- float *v)
+emit_boxes_linear_identity_mask__avx2(const struct sna_composite_op *op,
+ const BoxRec *box, int nbox,
+ float *v)
{
- float xx = op->src.transform->matrix[0][0];
- float x0 = op->src.transform->matrix[0][2];
- float yy = op->src.transform->matrix[1][1];
- float y0 = op->src.transform->matrix[1][2];
- float sx = op->src.scale[0];
- float sy = op->src.scale[1];
- int16_t tx = op->src.offset[0];
- int16_t ty = op->src.offset[1];
+ float msk_x = op->mask.offset[0];
+ float msk_y = op->mask.offset[1];
do {
union {
@@ -1675,18 +1671,22 @@ emit_boxes_simple_source__avx2(const struct sna_composite_op *op,
dst.p.x = box->x2;
dst.p.y = box->y2;
v[0] = dst.f;
- v[1] = ((box->x2 + tx) * xx + x0) * sx;
- v[5] = v[2] = ((box->y2 + ty) * yy + y0) * sy;
+ v[2] = (msk_x + box->x2) * op->mask.scale[0];
+ v[7] = v[3] = (msk_y + box->y2) * op->mask.scale[1];
dst.p.x = box->x1;
- v[3] = dst.f;
- v[7] = v[4] = ((box->x1 + tx) * xx + x0) * sx;
+ v[4] = dst.f;
+ v[10] = v[6] = (msk_x + box->x1) * op->mask.scale[0];
dst.p.y = box->y1;
- v[6] = dst.f;
- v[8] = ((box->y1 + ty) * yy + y0) * sy;
+ v[8] = dst.f;
+ v[11] = (msk_y + box->y1) * op->mask.scale[1];
- v += 9;
+ v[1] = compute_linear(&op->src, box->x2, box->y2);
+ v[5] = compute_linear(&op->src, box->x1, box->y2);
+ v[9] = compute_linear(&op->src, box->x1, box->y1);
+
+ v += 12;
box++;
} while (--nbox);
}
More information about the xorg-commit
mailing list