pixman: Branch 'master' - 8 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Tue Feb 15 04:42:01 PST 2011
pixman/pixman-arm-common.h | 59 ++++++++++++-
pixman/pixman-arm-neon-asm.S | 28 ++++++
pixman/pixman-arm-neon.c | 11 ++
pixman/pixman-fast-path.c | 13 +--
pixman/pixman-fast-path.h | 185 +++++++++++++++++++++++++++++++++++++------
pixman/pixman-sse2.c | 124 ++++++++++++++++++++++++++++
test/scaling-test.c | 136 +++++++++++++++++++++++++++++--
7 files changed, 511 insertions(+), 45 deletions(-)
New commits:
commit 8e4100260bbdb827abc45a2a5e352a53246fe614
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Fri Feb 4 00:47:36 2011 +0200
SSE2 optimization for nearest scaled over_8888_n_8888
This operation shows up a little bit in some of the html5 based
games from http://www.kesiev.com/akihabara/
=== Cairo trace of the game intro animation for 'Legend of Sadness' ===
before:
[ 0] image firefox-legend-of-sadness 46.286 46.298 0.01% 5/6
after:
[ 0] image firefox-legend-of-sadness 45.088 45.102 0.04% 6/6
=== Microbenchmark (scaling ~2000x~2000 -> ~2000x~2000) ===
before:
translucent: op=3, src=8888, mask=s dst=8888, speed=131.30 MPix/s
transparent: op=3, src=8888, mask=s dst=8888, speed=132.38 MPix/s
opaque: op=3, src=8888, mask=s dst=8888, speed=167.90 MPix/s
after:
translucent: op=3, src=8888, mask=s dst=8888, speed=301.93 MPix/s
transparent: op=3, src=8888, mask=s dst=8888, speed=770.70 MPix/s
opaque: op=3, src=8888, mask=s dst=8888, speed=301.80 MPix/s
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 6c494bc..2e135e2 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5898,6 +5898,119 @@ FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
scaled_nearest_scanline_sse2_8888_8888_OVER,
uint32_t, uint32_t, PAD)
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
+ uint32_t * dst,
+ const uint32_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx,
+ pixman_bool_t zero_src)
+{
+ __m128i xmm_mask;
+ __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+ __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+ __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+ if (zero_src || (*mask >> 24) == 0)
+ return;
+
+ xmm_mask = create_mask_16_128 (*mask >> 24);
+
+ while (w && (unsigned long)dst & 15)
+ {
+ uint32_t s = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m64 ms = unpack_32_1x64 (s);
+ __m64 alpha = expand_alpha_1x64 (ms);
+ __m64 dest = _mm_movepi64_pi64 (xmm_mask);
+ __m64 alpha_dst = unpack_32_1x64 (d);
+
+ *dst = pack_1x64_32 (
+ in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+ }
+ dst++;
+ w--;
+ }
+
+ while (w >= 4)
+ {
+ uint32_t tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+ if (!is_zero (xmm_src))
+ {
+ xmm_dst = load_128_aligned ((__m128i*)dst);
+
+ unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+ unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+ expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi);
+
+ in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+ &xmm_alpha_lo, &xmm_alpha_hi,
+ &xmm_mask, &xmm_mask,
+ &xmm_dst_lo, &xmm_dst_hi);
+
+ save_128_aligned (
+ (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+ }
+
+ dst += 4;
+ w -= 4;
+ }
+
+ while (w)
+ {
+ uint32_t s = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+
+ if (s)
+ {
+ uint32_t d = *dst;
+
+ __m64 ms = unpack_32_1x64 (s);
+ __m64 alpha = expand_alpha_1x64 (ms);
+ __m64 mask = _mm_movepi64_pi64 (xmm_mask);
+ __m64 dest = unpack_32_1x64 (d);
+
+ *dst = pack_1x64_32 (
+ in_over_1x64 (&ms, &alpha, &mask, &dest));
+ }
+
+ dst++;
+ w--;
+ }
+
+ _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+ scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+ uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+
static const pixman_fast_path_t sse2_fast_paths[] =
{
/* PIXMAN_OP_OVER */
@@ -5994,6 +6107,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+
{ PIXMAN_OP_NONE },
};
commit 39b86b032d1b81958d4dfc880ba7f129aecb1de0
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Nov 3 15:22:28 2010 +0200
ARM: NEON optimization for nearest scaled over_0565_8_0565
In some cases may be used for html5 video when hardware acceleration
is not available.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 927a8cd..47daf45 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -2377,3 +2377,17 @@ generate_composite_function_nearest_scanline \
4, /* dst_r_basereg */ \
8, /* src_basereg */ \
24 /* mask_basereg */
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_0565_8_0565_process_pixblock_head, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 7f6aea1..3e0c0d1 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -124,6 +124,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
OVER, uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+ OVER, uint16_t, uint16_t)
void
pixman_composite_src_n_8_asm_neon (int32_t w,
@@ -338,6 +340,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
{ PIXMAN_OP_NONE },
};
commit 9a90c1c90f1d128de68b3ed855a2ea1c3bed20c3
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Nov 3 15:16:28 2010 +0200
ARM: NEON optimization for nearest scaled over_8888_8_0565
In some cases may be used for html5 video when hardware acceleration
is not available.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 4dddde1..927a8cd 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -2363,3 +2363,17 @@ generate_composite_function_nearest_scanline \
pixman_composite_src_0565_8888_process_pixblock_head, \
pixman_composite_src_0565_8888_process_pixblock_tail, \
pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+ pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_8888_8_0565_process_pixblock_head, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+ pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 4, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 24 /* mask_basereg */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 7d6c837..7f6aea1 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -122,6 +122,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
uint16_t, uint32_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+ OVER, uint32_t, uint16_t)
+
void
pixman_composite_src_n_8_asm_neon (int32_t w,
int32_t h,
@@ -332,6 +335,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+ PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+
{ PIXMAN_OP_NONE },
};
commit cd1062ded44978fa97aa3d3295af016c80c6e2eb
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Nov 3 15:15:15 2010 +0200
ARM: new macro template for using scaled fast paths with a8 mask
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 6043d4e..9b1322b 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -317,4 +317,48 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op, \
SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op, \
+ src_type, dst_type) \
+void \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
+ int32_t w, \
+ dst_type * dst, \
+ const src_type * src, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ const uint8_t * mask); \
+ \
+static force_inline void \
+scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t * mask, \
+ dst_type * pd, \
+ const src_type * ps, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
+{ \
+ if ((flags & SKIP_ZERO_SRC) && zero_src) \
+ return; \
+ pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
+ vx, unit_x, \
+ mask); \
+} \
+ \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op, \
+ scaled_nearest_scanline_##cputype##_##name##_##op,\
+ src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
#endif
commit b099957887ef69b795d542f8f2980b5a94fb823f
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Feb 2 18:14:56 2011 +0200
Better support for NONE repeat in nearest scaling main loop template
Scaling function now gets an extra boolean argument, which is set
to TRUE when we are fetching padding pixels for NONE repeat. This
allows to make a decision whether to interpret alpha as 0xFF or 0x00
for such pixels when working with formats which don't have alpha
channel (for example x8r8g8b8 and r5g6b5).
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 525a2c4..6043d4e 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -294,7 +294,8 @@ scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t zero_src) \
{ \
pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps, \
vx, unit_x);\
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 0cbe375..92f0308 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1415,7 +1415,8 @@ scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
+ pixman_fixed_t max_vx,
+ pixman_bool_t fully_transparent_src)
{
uint16_t tmp1, tmp2, tmp3, tmp4;
while ((w -= 4) >= 0)
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
index a71f6f0..d081222 100644
--- a/pixman/pixman-fast-path.h
+++ b/pixman/pixman-fast-path.h
@@ -143,13 +143,17 @@ scanline_func_name (dst_type_t *dst, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t fully_transparent_src) \
{ \
uint32_t d; \
src_type_t s1, s2; \
uint8_t a1, a2; \
int x1, x2; \
\
+ if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \
+ return; \
+ \
if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \
abort(); \
\
@@ -348,18 +352,18 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
src = src_first_line + src_stride * y; \
if (left_pad > 0) \
{ \
- scanline_func (mask, dst, src, left_pad, 0, 0, 0); \
+ scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE); \
} \
if (width > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
- dst + left_pad, src, width, vx, unit_x, 0); \
+ dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
} \
if (right_pad > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
dst + left_pad + width, src + src_image->bits.width - 1, \
- right_pad, 0, 0, 0); \
+ right_pad, 0, 0, 0, FALSE); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
@@ -367,29 +371,29 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
static const src_type_t zero[1] = { 0 }; \
if (y < 0 || y >= src_image->bits.height) \
{ \
- scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0); \
+ scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE); \
continue; \
} \
src = src_first_line + src_stride * y; \
if (left_pad > 0) \
{ \
- scanline_func (mask, dst, zero, left_pad, 0, 0, 0); \
+ scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE); \
} \
if (width > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
- dst + left_pad, src, width, vx, unit_x, 0); \
+ dst + left_pad, src, width, vx, unit_x, 0, FALSE); \
} \
if (right_pad > 0) \
{ \
scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
- dst + left_pad + width, zero, right_pad, 0, 0, 0); \
+ dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE); \
} \
} \
else \
{ \
src = src_first_line + src_stride * y; \
- scanline_func (mask, dst, src, width, vx, unit_x, max_vx); \
+ scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE); \
} \
} \
}
@@ -410,9 +414,10 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
+ pixman_fixed_t max_vx, \
+ pixman_bool_t fully_transparent_src) \
{ \
- scanline_func (dst, src, w, vx, unit_x, max_vx); \
+ scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \
} \
FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 91adc05..6c494bc 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5800,7 +5800,8 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
int32_t w,
pixman_fixed_t vx,
pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
+ pixman_fixed_t max_vx,
+ pixman_bool_t fully_transparent_src)
{
uint32_t s, d;
const uint32_t* pm = NULL;
@@ -5809,6 +5810,9 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t* pd,
__m128i xmm_src_lo, xmm_src_hi;
__m128i xmm_alpha_lo, xmm_alpha_hi;
+ if (fully_transparent_src)
+ return;
+
/* Align dst on a 16-byte boundary */
while (w && ((unsigned long)pd & 15))
{
commit 14f82083a12be07f340fdea491759b3bb77b4e66
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Fri Oct 22 17:54:41 2010 +0300
Support for a8 and solid mask in nearest scaling main loop template
In addition to the most common case of not having any mask at all, two
variants of scaling with mask show up in cairo traces:
1. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
2. solid mask
This patch extends the nearest scaling main loop template to also
support these cases.
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
index 591ab48..a71f6f0 100644
--- a/pixman/pixman-fast-path.h
+++ b/pixman/pixman-fast-path.h
@@ -245,8 +245,8 @@ scanline_func_name (dst_type_t *dst, \
} \
}
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t, \
- repeat_mode) \
+#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
static void \
fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \
pixman_op_t op, \
@@ -263,6 +263,7 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
int32_t height) \
{ \
dst_type_t *dst_line; \
+ mask_type_t *mask_line; \
src_type_t *src_first_line; \
int y; \
pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \
@@ -274,9 +275,19 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
\
src_type_t *src; \
dst_type_t *dst; \
- int src_stride, dst_stride; \
+ mask_type_t solid_mask; \
+ const mask_type_t *mask = &solid_mask; \
+ int src_stride, mask_stride, dst_stride; \
\
PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1); \
+ if (have_mask) \
+ { \
+ if (mask_is_solid) \
+ solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format); \
+ else \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \
+ mask_stride, mask_line, 1); \
+ } \
/* pass in 0 instead of src_x and src_y because src_x and src_y need to be \
* transformed from destination space to source space */ \
PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \
@@ -321,6 +332,11 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
{ \
dst = dst_line; \
dst_line += dst_stride; \
+ if (have_mask && !mask_is_solid) \
+ { \
+ mask = mask_line; \
+ mask_line += mask_stride; \
+ } \
\
y = vy >> 16; \
vy += unit_y; \
@@ -332,16 +348,18 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
src = src_first_line + src_stride * y; \
if (left_pad > 0) \
{ \
- scanline_func (dst, src, left_pad, 0, 0, 0); \
+ scanline_func (mask, dst, src, left_pad, 0, 0, 0); \
} \
if (width > 0) \
{ \
- scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
+ dst + left_pad, src, width, vx, unit_x, 0); \
} \
if (right_pad > 0) \
{ \
- scanline_func (dst + left_pad + width, src + src_image->bits.width - 1, \
- right_pad, 0, 0, 0); \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
+ dst + left_pad + width, src + src_image->bits.width - 1, \
+ right_pad, 0, 0, 0); \
} \
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
@@ -349,43 +367,67 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
static const src_type_t zero[1] = { 0 }; \
if (y < 0 || y >= src_image->bits.height) \
{ \
- scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0); \
+ scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0); \
continue; \
} \
src = src_first_line + src_stride * y; \
if (left_pad > 0) \
{ \
- scanline_func (dst, zero, left_pad, 0, 0, 0); \
+ scanline_func (mask, dst, zero, left_pad, 0, 0, 0); \
} \
if (width > 0) \
{ \
- scanline_func (dst + left_pad, src, width, vx, unit_x, 0); \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad), \
+ dst + left_pad, src, width, vx, unit_x, 0); \
} \
if (right_pad > 0) \
{ \
- scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0); \
+ scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \
+ dst + left_pad + width, zero, right_pad, 0, 0, 0); \
} \
} \
else \
{ \
src = src_first_line + src_stride * y; \
- scanline_func (dst, src, width, vx, unit_x, max_vx); \
+ scanline_func (mask, dst, src, width, vx, unit_x, max_vx); \
} \
} \
}
/* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
+#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid) \
+ FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \
+ dst_type_t, repeat_mode, have_mask, mask_is_solid)
+
+#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \
repeat_mode) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t, \
+ static force_inline void \
+ scanline_func##scale_func_name##_wrapper ( \
+ const uint8_t *mask, \
+ dst_type_t *dst, \
+ const src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx) \
+ { \
+ scanline_func (dst, src, w, vx, unit_x, max_vx); \
+ } \
+ FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \
+ src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \
repeat_mode) \
+ FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \
+ dst_type_t, repeat_mode)
#define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \
src_type_t, dst_type_t, OP, repeat_mode) \
FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \
OP, repeat_mode) \
- FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP, \
+ FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \
scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \
src_type_t, dst_type_t, repeat_mode)
@@ -439,6 +481,90 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
}
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NORMAL_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_PAD_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ (SCALED_NEAREST_FLAGS | \
+ FAST_PATH_NONE_REPEAT | \
+ FAST_PATH_X_UNIT_POSITIVE), \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \
+ }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \
+ { PIXMAN_OP_ ## op, \
+ PIXMAN_ ## s, \
+ SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP, \
+ PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \
+ PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \
+ fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \
+ }
+
/* Prefer the use of 'cover' variant, because it is faster */
#define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \
SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \
@@ -446,4 +572,14 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \
SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \
+ SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
#endif
commit e83cee5aac26522f31a7e81ea3f972ae2248f6b0
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Fri Oct 22 16:29:01 2010 +0300
test: Extend scaling-test to support a8/solid mask and ADD operation
Image width also has been increased because SIMD optimizations typically
do more unrolling in the inner loops, and this needs to be tested.
diff --git a/test/scaling-test.c b/test/scaling-test.c
index e91df32..dbb9d39 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -12,10 +12,10 @@
#include <stdio.h>
#include "utils.h"
-#define MAX_SRC_WIDTH 16
-#define MAX_SRC_HEIGHT 16
-#define MAX_DST_WIDTH 16
-#define MAX_DST_HEIGHT 16
+#define MAX_SRC_WIDTH 48
+#define MAX_SRC_HEIGHT 8
+#define MAX_DST_WIDTH 48
+#define MAX_DST_HEIGHT 8
#define MAX_STRIDE 4
/*
@@ -27,24 +27,32 @@ test_composite (int testnum,
{
int i;
pixman_image_t * src_img;
+ pixman_image_t * mask_img;
pixman_image_t * dst_img;
pixman_transform_t transform;
pixman_region16_t clip;
int src_width, src_height;
+ int mask_width, mask_height;
int dst_width, dst_height;
- int src_stride, dst_stride;
+ int src_stride, mask_stride, dst_stride;
int src_x, src_y;
+ int mask_x, mask_y;
int dst_x, dst_y;
int src_bpp;
+ int mask_bpp = 1;
int dst_bpp;
int w, h;
pixman_fixed_t scale_x = 65536, scale_y = 65536;
pixman_fixed_t translate_x = 0, translate_y = 0;
+ pixman_fixed_t mask_scale_x = 65536, mask_scale_y = 65536;
+ pixman_fixed_t mask_translate_x = 0, mask_translate_y = 0;
pixman_op_t op;
pixman_repeat_t repeat = PIXMAN_REPEAT_NONE;
+ pixman_repeat_t mask_repeat = PIXMAN_REPEAT_NONE;
pixman_format_code_t src_fmt, dst_fmt;
uint32_t * srcbuf;
uint32_t * dstbuf;
+ uint32_t * maskbuf;
uint32_t crc32;
FLOAT_REGS_CORRUPTION_DETECTOR_START ();
@@ -52,34 +60,68 @@ test_composite (int testnum,
src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
- op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
+ switch (lcg_rand_n (3))
+ {
+ case 0:
+ op = PIXMAN_OP_SRC;
+ break;
+ case 1:
+ op = PIXMAN_OP_OVER;
+ break;
+ default:
+ op = PIXMAN_OP_ADD;
+ break;
+ }
src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+
+ if (lcg_rand_n (2))
+ {
+ mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+ mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+ }
+ else
+ {
+ mask_width = mask_height = 1;
+ }
+
dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+ mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp;
dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
if (src_stride & 3)
src_stride += 2;
+ if (mask_stride & 1)
+ mask_stride += 1;
+ if (mask_stride & 2)
+ mask_stride += 2;
+
if (dst_stride & 3)
dst_stride += 2;
src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+ mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2);
+ mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2);
dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
srcbuf = (uint32_t *)malloc (src_stride * src_height);
+ maskbuf = (uint32_t *)malloc (mask_stride * mask_height);
dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
for (i = 0; i < src_stride * src_height; i++)
*((uint8_t *)srcbuf + i) = lcg_rand_n (256);
+ for (i = 0; i < mask_stride * mask_height; i++)
+ *((uint8_t *)maskbuf + i) = lcg_rand_n (256);
+
for (i = 0; i < dst_stride * dst_height; i++)
*((uint8_t *)dstbuf + i) = lcg_rand_n (256);
@@ -92,13 +134,16 @@ test_composite (int testnum,
src_img = pixman_image_create_bits (
src_fmt, src_width, src_height, srcbuf, src_stride);
+ mask_img = pixman_image_create_bits (
+ PIXMAN_a8, mask_width, mask_height, maskbuf, mask_stride);
+
dst_img = pixman_image_create_bits (
dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
image_endian_swap (src_img, src_bpp * 8);
image_endian_swap (dst_img, dst_bpp * 8);
- if (lcg_rand_n (8) > 0)
+ if (lcg_rand_n (4) > 0)
{
scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
@@ -109,6 +154,40 @@ test_composite (int testnum,
pixman_image_set_transform (src_img, &transform);
}
+ if (lcg_rand_n (2) > 0)
+ {
+ mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
+ mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
+ mask_translate_x = lcg_rand_N (65536);
+ mask_translate_y = lcg_rand_N (65536);
+ pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y);
+ pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y);
+ pixman_image_set_transform (mask_img, &transform);
+ }
+
+ switch (lcg_rand_n (4))
+ {
+ case 0:
+ mask_repeat = PIXMAN_REPEAT_NONE;
+ break;
+
+ case 1:
+ mask_repeat = PIXMAN_REPEAT_NORMAL;
+ break;
+
+ case 2:
+ mask_repeat = PIXMAN_REPEAT_PAD;
+ break;
+
+ case 3:
+ mask_repeat = PIXMAN_REPEAT_REFLECT;
+ break;
+
+ default:
+ break;
+ }
+ pixman_image_set_repeat (mask_img, mask_repeat);
+
switch (lcg_rand_n (4))
{
case 0:
@@ -137,6 +216,11 @@ test_composite (int testnum,
else
pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+ if (lcg_rand_n (2))
+ pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+ else
+ pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
if (verbose)
{
printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
@@ -183,6 +267,34 @@ test_composite (int testnum,
{
pixman_box16_t clip_boxes[2];
int n = lcg_rand_n (2) + 1;
+
+ for (i = 0; i < n; i++)
+ {
+ clip_boxes[i].x1 = lcg_rand_n (mask_width);
+ clip_boxes[i].y1 = lcg_rand_n (mask_height);
+ clip_boxes[i].x2 =
+ clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1);
+ clip_boxes[i].y2 =
+ clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1);
+
+ if (verbose)
+ {
+ printf ("mask clip box: [%d,%d-%d,%d]\n",
+ clip_boxes[i].x1, clip_boxes[i].y1,
+ clip_boxes[i].x2, clip_boxes[i].y2);
+ }
+ }
+
+ pixman_region_init_rects (&clip, clip_boxes, n);
+ pixman_image_set_clip_region (mask_img, &clip);
+ pixman_image_set_source_clipping (mask_img, 1);
+ pixman_region_fini (&clip);
+ }
+
+ if (lcg_rand_n (8) == 0)
+ {
+ pixman_box16_t clip_boxes[2];
+ int n = lcg_rand_n (2) + 1;
for (i = 0; i < n; i++)
{
clip_boxes[i].x1 = lcg_rand_n (dst_width);
@@ -204,8 +316,12 @@ test_composite (int testnum,
pixman_region_fini (&clip);
}
- pixman_image_composite (op, src_img, NULL, dst_img,
+ if (lcg_rand_n (2) == 0)
+ pixman_image_composite (op, src_img, NULL, dst_img,
src_x, src_y, 0, 0, dst_x, dst_y, w, h);
+ else
+ pixman_image_composite (op, src_img, mask_img, dst_img,
+ src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
if (dst_fmt == PIXMAN_x8r8g8b8)
{
@@ -230,10 +346,12 @@ test_composite (int testnum,
}
pixman_image_unref (src_img);
+ pixman_image_unref (mask_img);
pixman_image_unref (dst_img);
crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
free (srcbuf);
+ free (maskbuf);
free (dstbuf);
FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
@@ -245,6 +363,6 @@ main (int argc, const char *argv[])
{
pixman_disable_out_of_bounds_workaround ();
- return fuzzer_test_main("scaling", 8000000, 0x7F1AB59F,
+ return fuzzer_test_main("scaling", 8000000, 0x80DF1CB2,
test_composite, argc, argv);
}
commit 97447f440fec9889bba6cc21c6d9366183c47e7e
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Jan 17 02:29:43 2011 +0200
Use const modifiers for source buffers in nearest scaling fast paths
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 372e9f9..525a2c4 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -282,15 +282,15 @@ cputype##_composite_##name (pixman_implementation_t *imp, \
src_type, dst_type) \
void \
pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype ( \
- int32_t w, \
- dst_type * dst, \
- src_type * src, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x);\
+ int32_t w, \
+ dst_type * dst, \
+ const src_type * src, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x); \
\
static force_inline void \
scaled_nearest_scanline_##cputype##_##name##_##op (dst_type * pd, \
- src_type * ps, \
+ const src_type * ps, \
int32_t w, \
pixman_fixed_t vx, \
pixman_fixed_t unit_x, \
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 771ec9d..0cbe375 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1410,12 +1410,12 @@ FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
- uint16_t * src,
- int32_t w,
- pixman_fixed_t vx,
- pixman_fixed_t unit_x,
- pixman_fixed_t max_vx)
+scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
+ const uint16_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx)
{
uint16_t tmp1, tmp2, tmp3, tmp4;
while ((w -= 4) >= 0)
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
index 069a282..591ab48 100644
--- a/pixman/pixman-fast-path.h
+++ b/pixman/pixman-fast-path.h
@@ -138,12 +138,12 @@ pad_repeat_get_scanline_bounds (int32_t source_image_width,
#define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \
src_type_t, dst_type_t, OP, repeat_mode) \
static force_inline void \
-scanline_func_name (dst_type_t *dst, \
- src_type_t *src, \
- int32_t w, \
- pixman_fixed_t vx, \
- pixman_fixed_t unit_x, \
- pixman_fixed_t max_vx) \
+scanline_func_name (dst_type_t *dst, \
+ const src_type_t *src, \
+ int32_t w, \
+ pixman_fixed_t vx, \
+ pixman_fixed_t unit_x, \
+ pixman_fixed_t max_vx) \
{ \
uint32_t d; \
src_type_t s1, s2; \
@@ -346,7 +346,7 @@ fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp,
} \
else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \
{ \
- static src_type_t zero[1] = { 0 }; \
+ static const src_type_t zero[1] = { 0 }; \
if (y < 0 || y >= src_image->bits.height) \
{ \
scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0); \
More information about the xorg-commit
mailing list