pixman: Branch 'master' - 2 commits
Søren Sandmann Pedersen
sandmann at kemper.freedesktop.org
Sat Feb 13 15:10:23 PST 2010
pixman/pixman-fast-path.c | 248 ++++++++++++++++++++++++++++++----------------
1 file changed, 162 insertions(+), 86 deletions(-)
New commits:
commit 97a12457394b36b5b052927af65ac3944ceccf09
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date: Sun May 31 15:06:33 2009 -0400
Once unrolled version of fast_path_composite_nearest_scaled()
Separate out the fetching and combining code in two inline
functions. Then do two pixels per iteration.
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 37a9719..170e9d6 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1431,6 +1431,44 @@ repeat (pixman_repeat_t repeat, int *c, int size)
return TRUE;
}
+static force_inline uint32_t
+fetch_nearest (pixman_repeat_t src_repeat,
+ pixman_format_code_t format,
+ uint32_t *src, int x, int src_width)
+{
+ if (repeat (src_repeat, &x, src_width))
+ {
+ if (format == PIXMAN_x8r8g8b8)
+ return *(src + x) | 0xff000000;
+ else
+ return *(src + x);
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+static force_inline void
+combine_over (uint32_t s, uint32_t *dst)
+{
+ if (s)
+ {
+ uint8_t ia = 0xff - (s >> 24);
+
+ if (ia)
+ UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
+ else
+ *dst = s;
+ }
+}
+
+static force_inline void
+combine_src (uint32_t s, uint32_t *dst)
+{
+ *dst = s;
+}
+
static void
fast_composite_scaled_nearest (pixman_implementation_t *imp,
pixman_op_t op,
@@ -1446,10 +1484,9 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp,
int32_t width,
int32_t height)
{
- uint32_t *dst;
- uint32_t *src;
- int dst_stride, src_stride;
- int i;
+ uint32_t *dst_line;
+ uint32_t *src_line;
+ int dst_stride, src_stride;
int src_width, src_height;
pixman_repeat_t src_repeat;
pixman_fixed_t unit_x, unit_y;
@@ -1457,11 +1494,11 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp,
pixman_vector_t v;
pixman_fixed_t vy;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1);
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be
* transformed from destination space to source space
*/
- PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src, 1);
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
/* reference point is the center of the pixel */
v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
@@ -1487,8 +1524,14 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp,
while (height--)
{
pixman_fixed_t vx = v.vector[0];
- uint32_t *src_line;
int y = pixman_fixed_to_int (vy);
+ uint32_t *dst = dst_line;
+
+ dst_line += dst_stride;
+
+ /* adjust the y location by a unit vector in the y direction
+ * this is equivalent to transforming y+1 of the destination point to source space */
+ vy += unit_y;
if (!repeat (src_repeat, &y, src_height))
{
@@ -1497,56 +1540,54 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp,
}
else
{
- src_line = src + y * src_stride;
+ int w = width;
+
+ uint32_t *src = src_line + y * src_stride;
- for (i = 0; i < width; ++i)
+ while (w >= 2)
{
- uint32_t s, d;
- int x;
- x = pixman_fixed_to_int (vx);
+ uint32_t s1, s2;
+ int x1, x2;
+
+ x1 = pixman_fixed_to_int (vx);
+ vx += unit_x;
+
+ x2 = pixman_fixed_to_int (vx);
+ vx += unit_x;
- if (!repeat (src_repeat, &x, src_width))
+ w -= 2;
+
+ s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
+ s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
+
+ if (op == PIXMAN_OP_OVER)
{
- s = 0;
+ combine_over (s1, dst++);
+ combine_over (s2, dst++);
}
else
{
- s = *(src_line + x);
-
- if (src_format == PIXMAN_x8r8g8b8)
- s |= 0xff000000;
+ combine_src (s1, dst++);
+ combine_src (s2, dst++);
}
+ }
- d = s;
- if (op == PIXMAN_OP_OVER)
- {
- uint8_t ia;
-
- if (!s)
- goto skip_write;
-
- ia = 0xff - (s >> 24);
+ while (w--)
+ {
+ uint32_t s;
+ int x;
- if (ia)
- {
- d = *(dst + i);
+ x = pixman_fixed_to_int (vx);
+ vx += unit_x;
- UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
- }
- }
- *(dst + i) = d;
+ s = fetch_nearest (src_repeat, src_format, src, x, src_width);
- skip_write:
- /* adjust the x location by a unit vector in the x direction:
- * this is equivalent to transforming x+1 of the destination point to source space */
- vx += unit_x;
+ if (op == PIXMAN_OP_OVER)
+ combine_over (s, dst++);
+ else
+ combine_src (s, dst++);
}
}
-
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point to source space */
- vy += unit_y;
- dst += dst_stride;
}
}
commit e5972110750b32929a474c35362f4639dbbd1222
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date: Sat May 30 02:50:38 2009 -0400
Generalize and optimize fast_composite_src_scaled_nearest()
- Make it work for PIXMAN_OP_OVER
- Split repeat computation for x and y, and only the x part in the
inner loop.
- Move stride multiplication outside of inner loop
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 54b7422..37a9719 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1403,30 +1403,64 @@ static const pixman_fast_path_t c_fast_paths[] =
{ PIXMAN_OP_NONE },
};
+static force_inline pixman_bool_t
+repeat (pixman_repeat_t repeat, int *c, int size)
+{
+ if (repeat == PIXMAN_REPEAT_NONE)
+ {
+ if (*c < 0 || *c >= size)
+ return FALSE;
+ }
+ else if (repeat == PIXMAN_REPEAT_NORMAL)
+ {
+ while (*c >= size)
+ *c -= size;
+ while (*c < 0)
+ *c += size;
+ }
+ else if (repeat == PIXMAN_REPEAT_PAD)
+ {
+ *c = CLIP (*c, 0, size - 1);
+ }
+ else /* REFLECT */
+ {
+ *c = MOD (*c, size * 2);
+ if (*c >= size)
+ *c = size * 2 - *c - 1;
+ }
+ return TRUE;
+}
+
static void
-fast_composite_src_scale_nearest (pixman_implementation_t *imp,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+fast_composite_scaled_nearest (pixman_implementation_t *imp,
+ pixman_op_t op,
+ pixman_image_t * src_image,
+ pixman_image_t * mask_image,
+ pixman_image_t * dst_image,
+ int32_t src_x,
+ int32_t src_y,
+ int32_t mask_x,
+ int32_t mask_y,
+ int32_t dest_x,
+ int32_t dest_y,
+ int32_t width,
+ int32_t height)
{
uint32_t *dst;
uint32_t *src;
int dst_stride, src_stride;
- int i, j;
+ int i;
+ int src_width, src_height;
+ pixman_repeat_t src_repeat;
+ pixman_fixed_t unit_x, unit_y;
+ pixman_format_code_t src_format;
pixman_vector_t v;
+ pixman_fixed_t vy;
PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1);
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be
- * transformed from destination space to source space */
+ * transformed from destination space to source space
+ */
PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src, 1);
/* reference point is the center of the pixel */
@@ -1437,81 +1471,82 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp,
if (!pixman_transform_point_3d (src_image->common.transform, &v))
return;
+ unit_x = src_image->common.transform->matrix[0][0];
+ unit_y = src_image->common.transform->matrix[1][1];
+
/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
v.vector[0] -= pixman_fixed_e;
v.vector[1] -= pixman_fixed_e;
- for (j = 0; j < height; j++)
+ src_height = src_image->bits.height;
+ src_width = src_image->bits.width;
+ src_repeat = src_image->common.repeat;
+ src_format = src_image->bits.format;
+
+ vy = v.vector[1];
+ while (height--)
{
- pixman_fixed_t vx = v.vector[0];
- pixman_fixed_t vy = v.vector[1];
+ pixman_fixed_t vx = v.vector[0];
+ uint32_t *src_line;
+ int y = pixman_fixed_to_int (vy);
- for (i = 0; i < width; ++i)
+ if (!repeat (src_repeat, &y, src_height))
{
- pixman_bool_t inside_bounds;
- uint32_t result;
- int x, y;
- x = vx >> 16;
- y = vy >> 16;
-
- /* apply the repeat function */
- switch (src_image->common.repeat)
- {
- case PIXMAN_REPEAT_NORMAL:
- x = MOD (x, src_image->bits.width);
- y = MOD (y, src_image->bits.height);
- inside_bounds = TRUE;
- break;
-
- case PIXMAN_REPEAT_PAD:
- x = CLIP (x, 0, src_image->bits.width - 1);
- y = CLIP (y, 0, src_image->bits.height - 1);
- inside_bounds = TRUE;
- break;
-
- case PIXMAN_REPEAT_REFLECT:
- x = MOD (x, src_image->bits.width * 2);
- if (x >= src_image->bits.width)
- x = src_image->bits.width * 2 - x - 1;
- y = MOD (y, src_image->bits.height * 2);
- if (y >= src_image->bits.height)
- y = src_image->bits.height * 2 - y - 1;
- inside_bounds = TRUE;
- break;
-
- case PIXMAN_REPEAT_NONE:
- default:
- inside_bounds =
- (x >= 0 &&
- x < src_image->bits.width &&
- y >= 0 &&
- y < src_image->bits.height);
- break;
- }
+ if (op == PIXMAN_OP_SRC)
+ memset (dst, 0, sizeof (*dst) * width);
+ }
+ else
+ {
+ src_line = src + y * src_stride;
- if (inside_bounds)
- {
- /* XXX: we should move this multiplication out of the loop */
- result = *(src + y * src_stride + x);
- }
- else
+ for (i = 0; i < width; ++i)
{
- result = 0;
- }
- *(dst + i) = result;
+ uint32_t s, d;
+ int x;
+ x = pixman_fixed_to_int (vx);
- /* adjust the x location by a unit vector in the x direction:
- * this is equivalent to transforming x+1 of the destination
- * point to source space
- */
- vx += src_image->common.transform->matrix[0][0];
+ if (!repeat (src_repeat, &x, src_width))
+ {
+ s = 0;
+ }
+ else
+ {
+ s = *(src_line + x);
+
+ if (src_format == PIXMAN_x8r8g8b8)
+ s |= 0xff000000;
+ }
+
+ d = s;
+ if (op == PIXMAN_OP_OVER)
+ {
+ uint8_t ia;
+
+ if (!s)
+ goto skip_write;
+
+ ia = 0xff - (s >> 24);
+
+ if (ia)
+ {
+ d = *(dst + i);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ }
+ }
+ *(dst + i) = d;
+
+ skip_write:
+ /* adjust the x location by a unit vector in the x direction:
+ * this is equivalent to transforming x+1 of the destination point to source space */
+ vx += unit_x;
+ }
}
- /* adjust the y location by a unit vector in the y direction
- * this is equivalent to transforming y+1 of the destination point
- * to source space
- */
- v.vector[1] += src_image->common.transform->matrix[1][1];
- dst += dst_stride;
+
+ /* adjust the y location by a unit vector in the y direction
+ * this is equivalent to transforming y+1 of the destination point to source space */
+ vy += unit_y;
+ dst += dst_stride;
}
}
@@ -1533,11 +1568,11 @@ fast_path_composite (pixman_implementation_t *imp,
if (src->type == BITS
&& src->common.transform
&& !mask
- && op == PIXMAN_OP_SRC
+ && (op == PIXMAN_OP_SRC || op == PIXMAN_OP_OVER)
&& !src->common.alpha_map && !dest->common.alpha_map
&& (src->common.filter == PIXMAN_FILTER_NEAREST)
- && PIXMAN_FORMAT_BPP (dest->bits.format) == 32
- && src->bits.format == dest->bits.format
+ && (dest->bits.format == PIXMAN_a8r8g8b8 || dest->bits.format == PIXMAN_x8r8g8b8)
+ && (src->bits.format == PIXMAN_a8r8g8b8 || src->bits.format == PIXMAN_x8r8g8b8)
&& !src->bits.read_func && !src->bits.write_func
&& !dest->bits.read_func && !dest->bits.write_func)
{
@@ -1554,7 +1589,7 @@ fast_path_composite (pixman_implementation_t *imp,
mask_x, mask_y,
dest_x, dest_y,
width, height,
- fast_composite_src_scale_nearest);
+ fast_composite_scaled_nearest);
return;
}
}
More information about the xorg-commit
mailing list