pixman: Branch 'master' - 12 commits
Søren Sandmann Pedersen
sandmann at kemper.freedesktop.org
Wed Mar 17 08:34:35 PDT 2010
configure.ac | 82 ++++++++++++++
pixman/Makefile.am | 2
pixman/pixman-access.c | 22 ++-
pixman/pixman-compiler.h | 63 ++++++++++-
pixman/pixman-fast-path.c | 255 ++++++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-image.c | 30 +++--
pixman/pixman-private.h | 10 +
pixman/pixman.c | 121 ++++++++++++++++-----
test/blitters-test.c | 2
9 files changed, 528 insertions(+), 59 deletions(-)
New commits:
commit 265ea1fb4d05a920323f23a02f9dc379312bbdae
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Wed Mar 17 10:50:42 2010 -0400
Specialize the fast_composite_scaled_nearest_* scalers to positive x units
This avoids a test in the inner loop, which improves performance
especially for tiled sources.
On x86-32, I get these results:
Before:
op=1, src_fmt=20028888, dst_fmt=20028888, speed=306.96 MPix/s (73.18 FPS)
op=1, src_fmt=20028888, dst_fmt=10020565, speed=102.67 MPix/s (24.48 FPS)
op=1, src_fmt=10020565, dst_fmt=10020565, speed=324.85 MPix/s (77.45 FPS)
After:
op=1, src_fmt=20028888, dst_fmt=20028888, speed=332.19 MPix/s (79.20 FPS)
op=1, src_fmt=20028888, dst_fmt=10020565, speed=110.41 MPix/s (26.32 FPS)
op=1, src_fmt=10020565, dst_fmt=10020565, speed=363.28 MPix/s (86.61 FPS)
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 5b8ff5c..bf5b298 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1485,13 +1485,21 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
x1 = vx >> 16; \
vx += unit_x; \
if (do_repeat) \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
s1 = src[x1]; \
\
x2 = vx >> 16; \
vx += unit_x; \
if (do_repeat) \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
s2 = src[x2]; \
\
if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
@@ -1537,7 +1545,11 @@ fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementat
x1 = vx >> 16; \
vx += unit_x; \
if (do_repeat) \
- repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ { \
+ /* This works because we know that unit_x is positive */ \
+ while (vx >= max_vx) \
+ vx -= max_vx; \
+ } \
s1 = src[x1]; \
\
if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
@@ -1806,7 +1818,7 @@ static const pixman_fast_path_t c_fast_paths[] =
#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, \
- SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE, \
+ SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE | FAST_PATH_X_UNIT_POSITIVE, \
PIXMAN_null, 0, \
PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
commit 9cd1051523493e0926b146f05cdde34158391602
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Wed Mar 17 10:35:34 2010 -0400
Add a FAST_PATH_X_UNIT_POSITIVE flag
This is the common case for a lot of transformed images. If the unit
were negative, the transformation would be a reflection which is
fairly rare.
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index df5b457..9b44aa9 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -301,15 +301,21 @@ compute_image_info (pixman_image_t *image)
/* Transform */
if (!image->common.transform)
{
- flags |= FAST_PATH_ID_TRANSFORM;
+ flags |= (FAST_PATH_ID_TRANSFORM | FAST_PATH_X_UNIT_POSITIVE);
}
- else if (image->common.transform->matrix[0][1] == 0 &&
- image->common.transform->matrix[1][0] == 0 &&
- image->common.transform->matrix[2][0] == 0 &&
- image->common.transform->matrix[2][1] == 0 &&
- image->common.transform->matrix[2][2] == pixman_fixed_1)
+ else
{
- flags |= FAST_PATH_SCALE_TRANSFORM;
+ if (image->common.transform->matrix[0][1] == 0 &&
+ image->common.transform->matrix[1][0] == 0 &&
+ image->common.transform->matrix[2][0] == 0 &&
+ image->common.transform->matrix[2][1] == 0 &&
+ image->common.transform->matrix[2][2] == pixman_fixed_1)
+ {
+ flags |= FAST_PATH_SCALE_TRANSFORM;
+ }
+
+ if (image->common.transform->matrix[0][0] > 0)
+ flags |= FAST_PATH_X_UNIT_POSITIVE;
}
/* Alpha map */
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 0cf9113..d5767af 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -582,6 +582,7 @@ _pixman_choose_implementation (void);
#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
#define FAST_PATH_SAMPLES_COVER_CLIP (1 << 16)
#define FAST_PATH_16BIT_SAFE (1 << 17)
+#define FAST_PATH_X_UNIT_POSITIVE (1 << 18)
#define _FAST_PATH_STANDARD_FLAGS \
(FAST_PATH_ID_TRANSFORM | \
commit a5b51bb03c5c1258d7558efa13eca6c570e34ce6
Author: Alexander Larsson <alexl at redhat.com>
Date: Wed Mar 17 11:58:05 2010 +0100
Use the right format for the OVER_8888_565 fast path
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 6607a47..5b8ff5c 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1836,7 +1836,7 @@ static const pixman_fast_path_t c_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, x888_x888),
SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, x888_x888),
- SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, r5g6b5, 8888_565),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
#define NEAREST_FAST_PATH(op,s,d) \
{ PIXMAN_OP_ ## op, \
commit 3b92b711d031a7752e06d0a5f688f4c54f50a1e6
Author: Alexander Larsson <alexl at redhat.com>
Date: Fri Mar 12 15:45:04 2010 +0100
Add specialized fast nearest scalers
This is a macroized version of SRC/OVER repeat normal/unneeded nearest
neighbour scaling instantiated for some common 8888 and 565 formats.
Based on work by Siarhei Siamashka
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 4d26b0f..6607a47 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -27,6 +27,7 @@
#include <config.h>
#endif
#include <string.h>
+#include <stdlib.h>
#include "pixman-private.h"
#include "pixman-combine32.h"
@@ -1373,6 +1374,208 @@ repeat (pixman_repeat_t repeat, int *c, int size)
return TRUE;
}
+/* A macroified version of specialized nearest scalers for some
+ * common 8888 and 565 formats. It supports SRC and OVER ops.
+ *
+ * There are two repeat versions, one that handles repeat normal,
+ * and one without repeat handling that only works if the src region
+ * used is completely covered by the pre-repeated source samples.
+ *
+ * The loops are unrolled to process two pixels per iteration for better
+ * performance on most CPU architectures (superscalar processors
+ * can issue several operations simultaneously, other processors can hide
+ * instructions latencies by pipelining operations). Unrolling more
+ * does not make much sense because the compiler will start running out
+ * of spare registers soon.
+ */
+
+#define GET_8888_ALPHA(s) ((s) >> 24)
+ /* This is not actually used since we don't have an OVER with
+ 565 source, but it is needed to build. */
+#define GET_0565_ALPHA(s) 0xff
+
+#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
+ src_type_t, dst_type_t, OP, do_repeat) \
+static void \
+fast_composite_scaled_nearest_ ## scale_func_name ## _ ## OP (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dst_x, \
+ int32_t dst_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type_t *dst_line; \
+ src_type_t *src_first_line; \
+ uint32_t d; \
+ src_type_t s1, s2; \
+ uint8_t a1, a2; \
+ int w; \
+ int x1, x2, y; \
+ pixman_fixed_t orig_vx; \
+ pixman_fixed_t max_vx, max_vy; \
+ pixman_vector_t v; \
+ pixman_fixed_t vx, vy; \
+ pixman_fixed_t unit_x, unit_y; \
+ \
+ src_type_t *src; \
+ dst_type_t *dst; \
+ int src_stride, dst_stride; \
+ \
+ if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
+ abort(); \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
+ * transformed from destination space to source space */ \
+ PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
+ \
+ /* reference point is the center of the pixel */ \
+ v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \
+ v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \
+ v.vector[2] = pixman_fixed_1; \
+ \
+ if (!pixman_transform_point_3d (src_image->common.transform, &v)) \
+ return; \
+ \
+ unit_x = src_image->common.transform->matrix[0][0]; \
+ unit_y = src_image->common.transform->matrix[1][1]; \
+ \
+ /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \
+ v.vector[0] -= pixman_fixed_e; \
+ v.vector[1] -= pixman_fixed_e; \
+ \
+ vx = v.vector[0]; \
+ vy = v.vector[1]; \
+ \
+ if (do_repeat) \
+ { \
+ /* Clamp repeating positions inside the actual samples */ \
+ max_vx = src_image->bits.width << 16; \
+ max_vy = src_image->bits.height << 16; \
+ \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ } \
+ \
+ orig_vx = vx; \
+ \
+ while (--height >= 0) \
+ { \
+ dst = dst_line; \
+ dst_line += dst_stride; \
+ \
+ y = vy >> 16; \
+ vy += unit_y; \
+ if (do_repeat) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \
+ \
+ src = src_first_line + src_stride * y; \
+ \
+ w = width; \
+ vx = orig_vx; \
+ while ((w -= 2) >= 0) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (do_repeat) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ s1 = src[x1]; \
+ \
+ x2 = vx >> 16; \
+ vx += unit_x; \
+ if (do_repeat) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ s2 = src[x2]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ \
+ if (a2 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ else if (s2) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ a2 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s2); \
+ } \
+ } \
+ \
+ if (w & 1) \
+ { \
+ x1 = vx >> 16; \
+ vx += unit_x; \
+ if (do_repeat) \
+ repeat (PIXMAN_REPEAT_NORMAL, &vx, max_vx); \
+ s1 = src[x1]; \
+ \
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \
+ { \
+ a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \
+ \
+ if (a1 == 0xff) \
+ { \
+ *dst = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ else if (s1) \
+ { \
+ d = CONVERT_## DST_FORMAT ## _TO_8888 (*dst); \
+ a1 ^= 0xff; \
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \
+ *dst = CONVERT_8888_TO_ ## DST_FORMAT (d); \
+ } \
+ dst++; \
+ } \
+ else /* PIXMAN_OP_SRC */ \
+ { \
+ *dst++ = CONVERT_ ## SRC_FORMAT ## _TO_ ## DST_FORMAT (s1); \
+ } \
+ } \
+ } \
+}
+
+FAST_NEAREST(x888_x888_none, 8888, 8888, uint32_t, uint32_t, SRC, /*repeat: */ 0);
+FAST_NEAREST(x888_x888_normal, 8888, 8888, uint32_t, uint32_t, SRC, /*repeat: */ 1);
+FAST_NEAREST(x888_x888_none, 8888, 8888, uint32_t, uint32_t, OVER, /*repeat: */ 0);
+FAST_NEAREST(x888_x888_normal, 8888, 8888, uint32_t, uint32_t, OVER, /*repeat: */ 1);
+FAST_NEAREST(x888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, /*repeat: */ 0);
+FAST_NEAREST(x888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, /*repeat: */ 1);
+FAST_NEAREST(565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, /*repeat: */ 0);
+FAST_NEAREST(565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, /*repeat: */ 1);
+FAST_NEAREST(8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, /*repeat: */ 0);
+FAST_NEAREST(8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, /*repeat: */ 1);
+
static force_inline uint32_t
fetch_nearest (pixman_repeat_t src_repeat,
pixman_format_code_t format,
@@ -1595,6 +1798,46 @@ static const pixman_fast_path_t c_fast_paths[] =
FAST_PATH_NO_ACCESSORS | \
FAST_PATH_NO_WIDE_FORMAT)
+#define HAS_NORMAL_REPEAT_FLAGS \
+ (FAST_PATH_NO_REFLECT_REPEAT | \
+ FAST_PATH_NO_PAD_REPEAT | \
+ FAST_PATH_NO_NONE_REPEAT)
+
+#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | HAS_NORMAL_REPEAT_FLAGS | FAST_PATH_16BIT_SAFE, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }, \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_null, 0, \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, x888_x888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, x888_x888),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, x888_565),
+ SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, x888_565),
+
+ SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, x888_x888),
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, x888_x888),
+
+ SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, r5g6b5, 8888_565),
+
#define NEAREST_FAST_PATH(op,s,d) \
{ PIXMAN_OP_ ## op, \
PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \
commit 5750408e48259f42373a5233231104d9bd3eb35a
Author: Alexander Larsson <alexl at redhat.com>
Date: Fri Mar 12 15:41:01 2010 +0100
Add FAST_PATH_SAMPLES_COVER_CLIP and FAST_PATH_16BIT_SAFE
FAST_PATH_SAMPLES_COVER_CLIP:
This is set of the source sample grid, unrepeated but transformed
completely completely covers the clip destination. If this is set
you can use a simple scaled that doesn't have to care about the repeat
mode.
FAST_PATH_16BIT_SAFE:
This signifies two things:
1) The size of the src/mask fits in a 16.16 fixed point, so something like:
max_vx = src_image->bits.width << 16;
Is allowed and is guaranteed to not overflow max_vx
2) When stepping the source space we're guaranteed to never overflow
a 16.16 bit fix point variable, even if we step one extra step
in the destination space. This means that a loop doing:
x = vx >> 16;
vx += unit_x; d = src_row[x];
will never overflow vx causing x to be negative.
And additionally, if you track vx like above and apply NORMAL repeat
after the vx addition with something like:
while (vx >= max_vx) vx -= max_vx;
This will never overflow the vx even on the final increment that
takes vx one past the end of where we will read, which makes the
repeat loop safe.
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 65314b9..0cf9113 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -580,6 +580,8 @@ _pixman_choose_implementation (void);
#define FAST_PATH_IS_OPAQUE (1 << 13)
#define FAST_PATH_NEEDS_WORKAROUND (1 << 14)
#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
+#define FAST_PATH_SAMPLES_COVER_CLIP (1 << 16)
+#define FAST_PATH_16BIT_SAFE (1 << 17)
#define _FAST_PATH_STANDARD_FLAGS \
(FAST_PATH_ID_TRANSFORM | \
diff --git a/pixman/pixman.c b/pixman/pixman.c
index 68483a0..56c9536 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -479,24 +479,75 @@ walk_region_internal (pixman_implementation_t *imp,
}
}
-static force_inline pixman_bool_t
-image_covers (pixman_image_t *image,
- pixman_box32_t *extents,
- int x,
- int y)
+#define IS_16BIT(x) (((x) >= INT16_MIN) && ((x) <= INT16_MAX))
+
+static force_inline uint32_t
+compute_src_extents_flags (pixman_image_t *image,
+ pixman_box32_t *extents,
+ int x,
+ int y)
{
- if (image->common.type == BITS &&
- image->common.repeat == PIXMAN_REPEAT_NONE)
+ pixman_box16_t extents16;
+ uint32_t flags;
+
+ flags = FAST_PATH_COVERS_CLIP;
+
+ if (image->common.type != BITS)
+ return flags;
+
+ if (image->common.repeat == PIXMAN_REPEAT_NONE &&
+ (x > extents->x1 || y > extents->y1 ||
+ x + image->bits.width < extents->x2 ||
+ y + image->bits.height < extents->y2))
+ {
+ flags &= ~FAST_PATH_COVERS_CLIP;
+ }
+
+ if (IS_16BIT (extents->x1 - x) &&
+ IS_16BIT (extents->y1 - y) &&
+ IS_16BIT (extents->x2 - x) &&
+ IS_16BIT (extents->y2 - y))
{
- if (x > extents->x1 || y > extents->y1 ||
- x + image->bits.width < extents->x2 ||
- y + image->bits.height < extents->y2)
+ extents16.x1 = extents->x1 - x;
+ extents16.y1 = extents->y1 - y;
+ extents16.x2 = extents->x2 - x;
+ extents16.y2 = extents->y2 - y;
+
+ if (!image->common.transform ||
+ pixman_transform_bounds (image->common.transform, &extents16))
{
- return FALSE;
+ if (extents16.x1 >= 0 && extents16.y1 >= 0 &&
+ extents16.x2 <= image->bits.width &&
+ extents16.y2 <= image->bits.height)
+ {
+ flags |= FAST_PATH_SAMPLES_COVER_CLIP;
+ }
}
}
- return TRUE;
+ if (IS_16BIT (extents->x1 - x - 1) &&
+ IS_16BIT (extents->y1 - y - 1) &&
+ IS_16BIT (extents->x2 - x + 1) &&
+ IS_16BIT (extents->y2 - y + 1))
+ {
+ extents16.x1 = extents->x1 - x - 1;
+ extents16.y1 = extents->y1 - y - 1;
+ extents16.x2 = extents->x2 - x + 1;
+ extents16.y2 = extents->y2 - y + 1;
+
+ if (/* src space expanded by one in dest space fits in 16 bit */
+ (!image->common.transform ||
+ pixman_transform_bounds (image->common.transform, &extents16)) &&
+ /* And src image size can be used as 16.16 fixed point */
+ image->bits.width < 0x7fff &&
+ image->bits.height < 0x7fff)
+ {
+ /* Then we're "16bit safe" */
+ flags |= FAST_PATH_16BIT_SAFE;
+ }
+ }
+
+ return flags;
}
#define N_CACHED_FAST_PATHS 8
@@ -588,11 +639,10 @@ do_composite (pixman_implementation_t *imp,
extents = pixman_region32_extents (®ion);
- if (image_covers (src, extents, dest_x - src_x, dest_y - src_y))
- src_flags |= FAST_PATH_COVERS_CLIP;
-
- if (mask && image_covers (mask, extents, dest_x - mask_x, dest_y - mask_y))
- mask_flags |= FAST_PATH_COVERS_CLIP;
+ src_flags |= compute_src_extents_flags (src, extents, dest_x - src_x, dest_y - src_y);
+
+ if (mask)
+ mask_flags |= compute_src_extents_flags (mask, extents, dest_x - mask_x, dest_y - mask_y);
/*
* Check if we can replace our operator by a simpler one
commit cba6fbbddce5edfd8e28ef570c493b044761f870
Author: Alexander Larsson <alexl at redhat.com>
Date: Fri Mar 12 15:40:07 2010 +0100
Add FAST_PATH_NO_NONE_REPEAT flag
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index d09d193..df5b457 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -335,16 +335,20 @@ compute_image_info (pixman_image_t *image)
/* Repeat mode */
switch (image->common.repeat)
{
+ case PIXMAN_REPEAT_NONE:
+ flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT;
+ break;
+
case PIXMAN_REPEAT_REFLECT:
- flags |= FAST_PATH_NO_PAD_REPEAT;
+ flags |= FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT;
break;
case PIXMAN_REPEAT_PAD:
- flags |= FAST_PATH_NO_REFLECT_REPEAT;
+ flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_NONE_REPEAT;
break;
default:
- flags |= (FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT);
+ flags |= FAST_PATH_NO_REFLECT_REPEAT | FAST_PATH_NO_PAD_REPEAT | FAST_PATH_NO_NONE_REPEAT;
break;
}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index bc41249..65314b9 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -579,6 +579,7 @@ _pixman_choose_implementation (void);
#define FAST_PATH_SIMPLE_REPEAT (1 << 12)
#define FAST_PATH_IS_OPAQUE (1 << 13)
#define FAST_PATH_NEEDS_WORKAROUND (1 << 14)
+#define FAST_PATH_NO_NONE_REPEAT (1 << 15)
#define _FAST_PATH_STANDARD_FLAGS \
(FAST_PATH_ID_TRANSFORM | \
commit 7ec023ede155b9dacf574c4323740ef981802aa9
Author: Alexander Larsson <alexl at redhat.com>
Date: Tue Mar 16 14:18:29 2010 +0100
Add CONVERT_8888_TO_8888 and CONVERT_0565_TO_0565 macros
These are useful for macroization
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index d0bec39..bc41249 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -706,6 +706,10 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
+/* Trivial versions that are useful in macros */
+#define CONVERT_8888_TO_8888(s) (s)
+#define CONVERT_0565_TO_0565(s) (s)
+
#define PIXMAN_FORMAT_IS_WIDE(f) \
(PIXMAN_FORMAT_A (f) > 8 || \
PIXMAN_FORMAT_R (f) > 8 || \
commit c903d03052e1c34478556964338959b34928a388
Author: Alexander Larsson <alexl at redhat.com>
Date: Fri Mar 12 16:23:42 2010 +0100
Add CONVERT_0565_TO_8888 macro
This lets us simplify some fast paths since we get a consistent
naming that always has 8888 and gets some value for alpha.
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 9dcdca7..d0bec39 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -704,6 +704,8 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \
((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
+#define CONVERT_0565_TO_8888(s) (CONVERT_0565_TO_0888(s) | 0xff000000)
+
#define PIXMAN_FORMAT_IS_WIDE(f) \
(PIXMAN_FORMAT_A (f) > 8 || \
PIXMAN_FORMAT_R (f) > 8 || \
commit de27f45ddd46fc48ec9598f2f177155328d55580
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Mon Mar 15 11:51:09 2010 -0400
Ensure that only the low 4 bit of 4 bit pixels are stored.
In some cases we end up trying to use the STORE_4 macro with an 8 bit
values, which resulted in other pixels getting overwritten. Fix this
by always masking off the low 4 bits.
This fixes blitters-test on big-endian machines.
diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 389cf2a..fa0a267 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -2445,9 +2445,12 @@ store_scanline_x4a4 (bits_image_t * image,
do \
{ \
int bo = 4 * (o); \
- STORE_8 (img, l, bo, (bo & 4 ? \
- (FETCH_8 (img, l, bo) & 0xf0) | (v) : \
- (FETCH_8 (img, l, bo) & 0x0f) | ((v) << 4))); \
+ int v4 = (v) & 0x0f; \
+ \
+ STORE_8 (img, l, bo, ( \
+ bo & 4 ? \
+ (FETCH_8 (img, l, bo) & 0xf0) | (v4) : \
+ (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4))); \
} while (0)
#else
@@ -2455,9 +2458,12 @@ store_scanline_x4a4 (bits_image_t * image,
do \
{ \
int bo = 4 * (o); \
- STORE_8 (img, l, bo, (bo & 4 ? \
- (FETCH_8 (img, l, bo) & 0x0f) | ((v) << 4) : \
- (FETCH_8 (img, l, bo) & 0xf0) | (v))); \
+ int v4 = (v) & 0x0f; \
+ \
+ STORE_8 (img, l, bo, ( \
+ bo & 4 ? \
+ (FETCH_8 (img, l, bo) & 0x0f) | (v4 << 4) : \
+ (FETCH_8 (img, l, bo) & 0xf0) | (v4))); \
} while (0)
#endif
@@ -2484,11 +2490,11 @@ store_scanline_r1g2b1 (bits_image_t * image,
{
uint32_t *bits = image->bits + image->rowstride * y;
int i;
-
+
for (i = 0; i < width; ++i)
{
uint32_t pixel;
-
+
SPLIT (values[i]);
pixel = (((r >> 4) & 0x8) |
((g >> 5) & 0x6) |
diff --git a/test/blitters-test.c b/test/blitters-test.c
index c11917d..5e33031 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -482,7 +482,7 @@ main (int argc, char *argv[])
/* Predefined value for running with all the fastpath functions
disabled. It needs to be updated every time when changes are
introduced to this program or behavior of pixman changes! */
- if (crc == 0xEF7A1179)
+ if (crc == 0xA058F792)
{
printf ("blitters test passed\n");
}
commit 6532f8488abffb89501cb76de7d80b8ab2d49aed
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Tue Mar 16 08:17:10 2010 -0400
Fix contact address in configure.ac
diff --git a/configure.ac b/configure.ac
index 0bf5658..4dccfda 100644
--- a/configure.ac
+++ b/configure.ac
@@ -58,7 +58,7 @@ m4_define([pixman_micro], 11)
m4_define([pixman_version],[pixman_major.pixman_minor.pixman_micro])
-AC_INIT(pixman, pixman_version, "sandmann at daimi.au.dk", pixman)
+AC_INIT(pixman, pixman_version, "pixman at lists.freedesktop.org", pixman)
AM_INIT_AUTOMAKE([foreign dist-bzip2])
# Suppress verbose compile lines
commit 7c9f121efe7ee6afafad8b294974f5498054559b
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Tue Mar 16 12:23:50 2010 -0400
Add PIXMAN_DEFINE_THREAD_LOCAL() and PIXMAN_GET_THREAD_LOCAL() macros
These macros hide the various types of thread local support. On Linux
and Unix, they expand to just __thread. On Microsoft Visual C++, they
expand to __declspec(thread).
On OS X and other systems that don't have __thread, they expand to a
complicated concoction that uses pthread_once() and
pthread_get/set_specific() to get thread local variables.
diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
index 5aeef86..a4e3f88 100644
--- a/pixman/pixman-compiler.h
+++ b/pixman/pixman-compiler.h
@@ -70,11 +70,62 @@
#endif
/* TLS */
-#if (defined (__GNUC__) && ((__GNUC__ == 3 && __GNUC_MINOR >= 3) || __GNUC__ > 3)) || defined(__SUNPRO_C)
-# define THREAD_LOCAL __thread
-#elif defined (_MSC_VER)
-# define THREAD_LOCAL __declspec(thread)
+#if defined(TOOLCHAIN_SUPPORTS__THREAD)
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static __thread type name
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ (&name)
+
+#elif defined(_MSC_VER)
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static __declspec(thread) type name
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ (&name)
+
+#elif defined(HAVE_PTHREAD_SETSPECIFIC)
+
+#include <pthread.h>
+
+# define PIXMAN_DEFINE_THREAD_LOCAL(type, name) \
+ static pthread_once_t tls_ ## name ## _once_control = PTHREAD_ONCE_INIT; \
+ static pthread_key_t tls_ ## name ## _key; \
+ \
+ static void \
+ tls_ ## name ## _make_key (void) \
+ { \
+ pthread_key_create (&tls_ ## name ## _key, NULL); \
+ } \
+ \
+ static type * \
+ tls_ ## name ## _alloc (key) \
+ { \
+ type *value = malloc (sizeof (type)); \
+ if (value) \
+ pthread_setspecific (key, value); \
+ return value; \
+ } \
+ \
+ static force_inline type * \
+ tls_ ## name ## _get (key) \
+ { \
+ type *value = NULL; \
+ if (pthread_once (&tls_ ## name ## _once_control, \
+ tls_ ## name ## _make_key) == 0) \
+ { \
+ value = pthread_getspecific (tls_ ## name ## _key); \
+ if (!value) \
+ value = tls_ ## name ## _alloc (key); \
+ } \
+ return value; \
+ }
+
+# define PIXMAN_GET_THREAD_LOCAL(name) \
+ tls_ ## name ## _get (tls_ ## name ## _key)
+
#else
-# warning "unknown compiler"
-# define THREAD_LOCAL __thread
+
+# error "Unknown thread local support for this system"
+
#endif
diff --git a/pixman/pixman.c b/pixman/pixman.c
index c71617e..68483a0 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -499,6 +499,15 @@ image_covers (pixman_image_t *image,
return TRUE;
}
+#define N_CACHED_FAST_PATHS 8
+
+typedef struct
+{
+ pixman_fast_path_t cache [N_CACHED_FAST_PATHS];
+} cache_t;
+
+PIXMAN_DEFINE_THREAD_LOCAL (cache_t, fast_path_cache);
+
static void
do_composite (pixman_implementation_t *imp,
pixman_op_t op,
@@ -514,8 +523,6 @@ do_composite (pixman_implementation_t *imp,
int width,
int height)
{
-#define N_CACHED_FAST_PATHS 8
- static THREAD_LOCAL pixman_fast_path_t tls_cache[N_CACHED_FAST_PATHS];
pixman_format_code_t src_format, mask_format, dest_format;
uint32_t src_flags, mask_flags, dest_flags;
pixman_region32_t region;
@@ -527,8 +534,8 @@ do_composite (pixman_implementation_t *imp,
uint32_t *dest_bits;
int dest_dx, dest_dy;
pixman_bool_t need_workaround;
- pixman_fast_path_t *cache;
const pixman_fast_path_t *info;
+ cache_t *cache;
int i;
src_format = src->common.extended_format_code;
@@ -597,11 +604,11 @@ do_composite (pixman_implementation_t *imp,
return;
/* Check cache for fast paths */
- cache = tls_cache;
+ cache = PIXMAN_GET_THREAD_LOCAL (fast_path_cache);
for (i = 0; i < N_CACHED_FAST_PATHS; ++i)
{
- info = &(cache[i]);
+ info = &(cache->cache[i]);
/* Note that we check for equality here, not whether
* the cached fast path matches. This is to prevent
@@ -677,16 +684,16 @@ found:
pixman_composite_func_t func = info->func;
while (i--)
- cache[i + 1] = cache[i];
-
- cache[0].op = op;
- cache[0].src_format = src_format;
- cache[0].src_flags = src_flags;
- cache[0].mask_format = mask_format;
- cache[0].mask_flags = mask_flags;
- cache[0].dest_format = dest_format;
- cache[0].dest_flags = dest_flags;
- cache[0].func = func;
+ cache->cache[i + 1] = cache->cache[i];
+
+ cache->cache[0].op = op;
+ cache->cache[0].src_format = src_format;
+ cache->cache[0].src_flags = src_flags;
+ cache->cache[0].mask_format = mask_format;
+ cache->cache[0].mask_flags = mask_flags;
+ cache->cache[0].dest_format = dest_format;
+ cache->cache[0].dest_flags = dest_flags;
+ cache->cache[0].func = func;
}
out:
commit 6b9c54820015f69e667ed54441e83042c9a84cc1
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Tue Mar 16 11:01:08 2010 -0400
Add checks for various types of thread local storage.
OS X does not support __thread, so we have to check for it before
using it. It does however support pthread_get/setspecific(), so if we
don't have __thread, check if those are available.
diff --git a/configure.ac b/configure.ac
index fed97b1..0bf5658 100644
--- a/configure.ac
+++ b/configure.ac
@@ -523,6 +523,86 @@ if test x$have_posix_memalign = xyes; then
AC_DEFINE(HAVE_POSIX_MEMALIGN, 1, [Whether we have posix_memalign()])
fi
+dnl =====================================
+dnl Thread local storage
+
+support_for__thread=no
+
+AC_MSG_CHECKING(for __thread)
+AC_COMPILE_IFELSE([
+__thread int x ;
+int main () { return 0; }
+], support_for__thread=yes)
+
+if test $support_for__thread = yes; then
+ AC_DEFINE([TOOLCHAIN_SUPPORTS__THREAD],[],[Whether the tool chain supports __thread])
+fi
+
+AC_MSG_RESULT($support_for__thread)
+
+dnl posix tls
+
+if test $support_for__thread = no; then
+
+support_for_pthread_setspecific=no
+
+AC_MSG_CHECKING(for pthread_setspecific)
+
+save_LDFLAGS=$LDFLAGS
+
+LDFLAGS="-pthread"
+
+AC_LINK_IFELSE([
+#include <pthread.h>
+
+#include <stdlib.h>
+#include <pthread.h>
+
+static pthread_once_t once_control = PTHREAD_ONCE_INIT;
+static pthread_key_t key;
+
+static void
+make_key (void)
+{
+ pthread_key_create (&key, NULL);
+}
+
+int
+main ()
+{
+ void *value = NULL;
+
+ if (pthread_once (&once_control, make_key) != 0)
+ {
+ value = NULL;
+ }
+ else
+ {
+ value = pthread_getspecific (key);
+ if (!value)
+ {
+ value = malloc (100);
+ pthread_setspecific (key, value);
+ }
+ }
+}
+], support_for_pthread_setspecific=yes);
+
+LDFLAGS=$save_LDFLAGS
+
+if test $support_for_pthread_setspecific = yes; then
+ PTHREAD_LDFLAGS="-pthread"
+ AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
+fi
+
+AC_MSG_RESULT($support_for_pthread_setspecific);
+
+fi
+
+AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD)
+AC_SUBST(HAVE_PTHREAD_SETSPECIFIC)
+AC_SUBST(PTHREAD_LDFLAGS)
+
AC_OUTPUT([pixman-1.pc
pixman-1-uninstalled.pc
Makefile
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 8ac6827..5a0e7a9 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -1,5 +1,5 @@
lib_LTLIBRARIES = libpixman-1.la
-libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined
+libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO) -no-undefined @PTHREAD_LDFLAGS@
libpixman_1_la_LIBADD = @DEP_LIBS@ -lm
libpixman_1_la_SOURCES = \
pixman.h \
More information about the xorg-commit
mailing list