pixman: Branch 'master' - 8 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Tue Feb 15 04:42:01 PST 2011


 pixman/pixman-arm-common.h   |   59 ++++++++++++-
 pixman/pixman-arm-neon-asm.S |   28 ++++++
 pixman/pixman-arm-neon.c     |   11 ++
 pixman/pixman-fast-path.c    |   13 +--
 pixman/pixman-fast-path.h    |  185 +++++++++++++++++++++++++++++++++++++------
 pixman/pixman-sse2.c         |  124 ++++++++++++++++++++++++++++
 test/scaling-test.c          |  136 +++++++++++++++++++++++++++++--
 7 files changed, 511 insertions(+), 45 deletions(-)

New commits:
commit 8e4100260bbdb827abc45a2a5e352a53246fe614
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Fri Feb 4 00:47:36 2011 +0200

    SSE2 optimization for nearest scaled over_8888_n_8888
    
    This operation shows up a little bit in some of the html5 based
    games from http://www.kesiev.com/akihabara/
    
    === Cairo trace of the game intro animation for 'Legend of Sadness' ===
    
    before:
    [  0]    image    firefox-legend-of-sadness   46.286   46.298   0.01%    5/6
    
    after:
    [  0]    image    firefox-legend-of-sadness   45.088   45.102   0.04%    6/6
    
    === Microbenchmark (scaling ~2000x~2000 -> ~2000x~2000) ===
    
    before:
        translucent: op=3, src=8888, mask=s dst=8888, speed=131.30 MPix/s
        transparent: op=3, src=8888, mask=s dst=8888, speed=132.38 MPix/s
        opaque:      op=3, src=8888, mask=s dst=8888, speed=167.90 MPix/s
    after:
        translucent: op=3, src=8888, mask=s dst=8888, speed=301.93 MPix/s
        transparent: op=3, src=8888, mask=s dst=8888, speed=770.70 MPix/s
        opaque:      op=3, src=8888, mask=s dst=8888, speed=301.80 MPix/s

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 6c494bc..2e135e2 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5898,6 +5898,119 @@ FAST_NEAREST_MAINLOOP (sse2_8888_8888_pad_OVER,
 		       scaled_nearest_scanline_sse2_8888_8888_OVER,
 		       uint32_t, uint32_t, PAD)
 
+static force_inline void
+scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
+					       uint32_t *       dst,
+					       const uint32_t * src,
+					       int32_t          w,
+					       pixman_fixed_t   vx,
+					       pixman_fixed_t   unit_x,
+					       pixman_fixed_t   max_vx,
+					       pixman_bool_t    zero_src)
+{
+    __m128i xmm_mask;
+    __m128i xmm_src, xmm_src_lo, xmm_src_hi;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+    __m128i xmm_alpha_lo, xmm_alpha_hi;
+
+    if (zero_src || (*mask >> 24) == 0)
+	return;
+
+    xmm_mask = create_mask_16_128 (*mask >> 24);
+
+    while (w && (unsigned long)dst & 15)
+    {
+	uint32_t s = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+
+	if (s)
+	{
+	    uint32_t d = *dst;
+
+	    __m64 ms = unpack_32_1x64 (s);
+	    __m64 alpha     = expand_alpha_1x64 (ms);
+	    __m64 dest      = _mm_movepi64_pi64 (xmm_mask);
+	    __m64 alpha_dst = unpack_32_1x64 (d);
+
+	    *dst = pack_1x64_32 (
+		in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+	}
+	dst++;
+	w--;
+    }
+
+    while (w >= 4)
+    {
+	uint32_t tmp1, tmp2, tmp3, tmp4;
+
+	tmp1 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp2 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp3 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp4 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+
+	xmm_src = _mm_set_epi32 (tmp4, tmp3, tmp2, tmp1);
+
+	if (!is_zero (xmm_src))
+	{
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			        &xmm_alpha_lo, &xmm_alpha_hi);
+
+	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			   &xmm_alpha_lo, &xmm_alpha_hi,
+			   &xmm_mask, &xmm_mask,
+			   &xmm_dst_lo, &xmm_dst_hi);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	}
+
+	dst += 4;
+	w -= 4;
+    }
+
+    while (w)
+    {
+	uint32_t s = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+
+	if (s)
+	{
+	    uint32_t d = *dst;
+
+	    __m64 ms = unpack_32_1x64 (s);
+	    __m64 alpha = expand_alpha_1x64 (ms);
+	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
+	    __m64 dest  = unpack_32_1x64 (d);
+
+	    *dst = pack_1x64_32 (
+		in_over_1x64 (&ms, &alpha, &mask, &dest));
+	}
+
+	dst++;
+	w--;
+    }
+
+    _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_cover_OVER,
+			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_pad_OVER,
+			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
+			      scaled_nearest_scanline_sse2_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+
 static const pixman_fast_path_t sse2_fast_paths[] =
 {
     /* PIXMAN_OP_OVER */
@@ -5994,6 +6107,11 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_8888),
     SIMPLE_NEAREST_FAST_PATH_PAD (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_8888),
 
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, sse2_8888_n_8888),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, sse2_8888_n_8888),
+
     { PIXMAN_OP_NONE },
 };
 
commit 39b86b032d1b81958d4dfc880ba7f129aecb1de0
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Nov 3 15:22:28 2010 +0200

    ARM: NEON optimization for nearest scaled over_0565_8_0565
    
    In some cases may be used for html5 video when hardware acceleration
    is not available.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 927a8cd..47daf45 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -2377,3 +2377,17 @@ generate_composite_function_nearest_scanline \
     4,  /* dst_r_basereg */ \
     8,  /* src_basereg   */ \
     24  /* mask_basereg  */
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_neon, 16, 8, 16, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
+    pixman_composite_over_0565_8_0565_process_pixblock_head, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    10,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    15  /* mask_basereg  */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 7f6aea1..3e0c0d1 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -124,6 +124,8 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
 
 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
                                            OVER, uint32_t, uint16_t)
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 0565_8_0565,
+                                           OVER, uint16_t, uint16_t)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -338,6 +340,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
     PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
 
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, neon_0565_8_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, neon_0565_8_0565),
+
     { PIXMAN_OP_NONE },
 };
 
commit 9a90c1c90f1d128de68b3ed855a2ea1c3bed20c3
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Nov 3 15:16:28 2010 +0200

    ARM: NEON optimization for nearest scaled over_8888_8_0565
    
    In some cases may be used for html5 video when hardware acceleration
    is not available.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 4dddde1..927a8cd 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -2363,3 +2363,17 @@ generate_composite_function_nearest_scanline \
     pixman_composite_src_0565_8888_process_pixblock_head, \
     pixman_composite_src_0565_8888_process_pixblock_tail, \
     pixman_composite_src_0565_8888_process_pixblock_tail_head
+
+generate_composite_function_nearest_scanline \
+    pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_neon, 32, 8, 16, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
+    pixman_composite_over_8888_8_0565_process_pixblock_head, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail, \
+    pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    24  /* mask_basereg  */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 7d6c837..7f6aea1 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -122,6 +122,9 @@ PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 8888_0565, SRC,
 PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_DST (neon, 0565_8888, SRC,
                                         uint16_t, uint32_t)
 
+PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, neon, 8888_8_0565,
+                                           OVER, uint32_t, uint16_t)
+
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
                                    int32_t   h,
@@ -332,6 +335,9 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, r5g6b5, a8r8g8b8, neon_0565_8888),
     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, b5g6r5, a8b8g8r8, neon_0565_8888),
 
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, neon_8888_8_0565),
+    PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, neon_8888_8_0565),
+
     { PIXMAN_OP_NONE },
 };
 
commit cd1062ded44978fa97aa3d3295af016c80c6e2eb
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Nov 3 15:15:15 2010 +0200

    ARM: new macro template for using scaled fast paths with a8 mask

diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 6043d4e..9b1322b 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -317,4 +317,48 @@ FAST_NEAREST_MAINLOOP (cputype##_##name##_pad_##op,                           \
     SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func),                              \
     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func)
 
+#define PIXMAN_ARM_BIND_SCALED_NEAREST_SRC_A8_DST(flags, cputype, name, op,   \
+                                                  src_type, dst_type)         \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
+                                                   int32_t          w,        \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x,   \
+                                                   const uint8_t *  mask);    \
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_##cputype##_##name##_##op (const uint8_t *  mask,     \
+                                                   dst_type *       pd,       \
+                                                   const src_type * ps,       \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x,   \
+                                                   pixman_fixed_t   max_vx,   \
+                                                   pixman_bool_t    zero_src) \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+	return;                                                               \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
+                                                                  vx, unit_x, \
+                                                                  mask);      \
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_cover_##op,                  \
+                              scaled_nearest_scanline_##cputype##_##name##_##op,\
+                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_none_##op,                   \
+                              scaled_nearest_scanline_##cputype##_##name##_##op,\
+                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (cputype##_##name##_pad_##op,                    \
+                              scaled_nearest_scanline_##cputype##_##name##_##op,\
+                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_ARM_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)              \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
 #endif
commit b099957887ef69b795d542f8f2980b5a94fb823f
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Feb 2 18:14:56 2011 +0200

    Better support for NONE repeat in nearest scaling main loop template
    
    Scaling function now gets an extra boolean argument, which is set
    to TRUE when we are fetching padding pixels for NONE repeat. This
    allows to make a decision whether to interpret alpha as 0xFF or 0x00
    for such pixels when working with formats which don't have alpha
    channel (for example x8r8g8b8 and r5g6b5).

diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 525a2c4..6043d4e 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -294,7 +294,8 @@ scaled_nearest_scanline_##cputype##_##name##_##op (dst_type *       pd,       \
                                                    int32_t          w,        \
                                                    pixman_fixed_t   vx,       \
                                                    pixman_fixed_t   unit_x,   \
-                                                   pixman_fixed_t   max_vx)   \
+                                                   pixman_fixed_t   max_vx,   \
+                                                   pixman_bool_t    zero_src) \
 {                                                                             \
     pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (w, pd, ps,  \
                                                                   vx, unit_x);\
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 0cbe375..92f0308 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1415,7 +1415,8 @@ scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
 				     int32_t          w,
 				     pixman_fixed_t   vx,
 				     pixman_fixed_t   unit_x,
-				     pixman_fixed_t   max_vx)
+				     pixman_fixed_t   max_vx,
+				     pixman_bool_t    fully_transparent_src)
 {
     uint16_t tmp1, tmp2, tmp3, tmp4;
     while ((w -= 4) >= 0)
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
index a71f6f0..d081222 100644
--- a/pixman/pixman-fast-path.h
+++ b/pixman/pixman-fast-path.h
@@ -143,13 +143,17 @@ scanline_func_name (dst_type_t       *dst,							\
 		    int32_t           w,							\
 		    pixman_fixed_t    vx,							\
 		    pixman_fixed_t    unit_x,							\
-		    pixman_fixed_t    max_vx)							\
+		    pixman_fixed_t    max_vx,							\
+		    pixman_bool_t     fully_transparent_src)					\
 {												\
 	uint32_t   d;										\
 	src_type_t s1, s2;									\
 	uint8_t    a1, a2;									\
 	int        x1, x2;									\
 												\
+	if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src)			\
+	    return;										\
+												\
 	if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER)		\
 	    abort();										\
 												\
@@ -348,18 +352,18 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 	    src = src_first_line + src_stride * y;						\
 	    if (left_pad > 0)									\
 	    {											\
-		scanline_func (mask, dst, src, left_pad, 0, 0, 0);				\
+		scanline_func (mask, dst, src, left_pad, 0, 0, 0, FALSE);			\
 	    }											\
 	    if (width > 0)									\
 	    {											\
 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
-			       dst + left_pad, src, width, vx, unit_x, 0);			\
+			       dst + left_pad, src, width, vx, unit_x, 0, FALSE);		\
 	    }											\
 	    if (right_pad > 0)									\
 	    {											\
 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
 			       dst + left_pad + width, src + src_image->bits.width - 1,		\
-			       right_pad, 0, 0, 0);						\
+			       right_pad, 0, 0, 0, FALSE);					\
 	    }											\
 	}											\
 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
@@ -367,29 +371,29 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 	    static const src_type_t zero[1] = { 0 };						\
 	    if (y < 0 || y >= src_image->bits.height)						\
 	    {											\
-		scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0);		\
+		scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0, TRUE);	\
 		continue;									\
 	    }											\
 	    src = src_first_line + src_stride * y;						\
 	    if (left_pad > 0)									\
 	    {											\
-		scanline_func (mask, dst, zero, left_pad, 0, 0, 0);				\
+		scanline_func (mask, dst, zero, left_pad, 0, 0, 0, TRUE);			\
 	    }											\
 	    if (width > 0)									\
 	    {											\
 		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
-			       dst + left_pad, src, width, vx, unit_x, 0);			\
+			       dst + left_pad, src, width, vx, unit_x, 0, FALSE);		\
 	    }											\
 	    if (right_pad > 0)									\
 	    {											\
 		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
-			       dst + left_pad + width, zero, right_pad, 0, 0, 0);		\
+			       dst + left_pad + width, zero, right_pad, 0, 0, 0, TRUE);		\
 	    }											\
 	}											\
 	else											\
 	{											\
 	    src = src_first_line + src_stride * y;						\
-	    scanline_func (mask, dst, src, width, vx, unit_x, max_vx);				\
+	    scanline_func (mask, dst, src, width, vx, unit_x, max_vx, FALSE);			\
 	}											\
     }												\
 }
@@ -410,9 +414,10 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 		    int32_t          w,								\
 		    pixman_fixed_t   vx,							\
 		    pixman_fixed_t   unit_x,							\
-		    pixman_fixed_t   max_vx)							\
+		    pixman_fixed_t   max_vx,							\
+		    pixman_bool_t    fully_transparent_src)					\
     {												\
-	scanline_func (dst, src, w, vx, unit_x, max_vx);					\
+	scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src);			\
     }												\
     FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
 			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 91adc05..6c494bc 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5800,7 +5800,8 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
                                              int32_t         w,
                                              pixman_fixed_t  vx,
                                              pixman_fixed_t  unit_x,
-                                             pixman_fixed_t  max_vx)
+                                             pixman_fixed_t  max_vx,
+                                             pixman_bool_t   fully_transparent_src)
 {
     uint32_t s, d;
     const uint32_t* pm = NULL;
@@ -5809,6 +5810,9 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_alpha_lo, xmm_alpha_hi;
 
+    if (fully_transparent_src)
+	return;
+
     /* Align dst on a 16-byte boundary */
     while (w && ((unsigned long)pd & 15))
     {
commit 14f82083a12be07f340fdea491759b3bb77b4e66
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Fri Oct 22 17:54:41 2010 +0300

    Support for a8 and solid mask in nearest scaling main loop template
    
    In addition to the most common case of not having any mask at all, two
    variants of scaling with mask show up in cairo traces:
    1. non-scaled a8 mask with SAMPLES_COVER_CLIP flag
    2. solid mask
    
    This patch extends the nearest scaling main loop template to also
    support these cases.

diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
index 591ab48..a71f6f0 100644
--- a/pixman/pixman-fast-path.h
+++ b/pixman/pixman-fast-path.h
@@ -245,8 +245,8 @@ scanline_func_name (dst_type_t       *dst,							\
 	}											\
 }
 
-#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
-				  repeat_mode)							\
+#define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
+				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
 static void											\
 fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,		\
 						   pixman_op_t              op,			\
@@ -263,6 +263,7 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 						   int32_t                  height)		\
 {												\
     dst_type_t *dst_line;									\
+    mask_type_t *mask_line;									\
     src_type_t *src_first_line;									\
     int       y;										\
     pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */		\
@@ -274,9 +275,19 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 												\
     src_type_t *src;										\
     dst_type_t *dst;										\
-    int       src_stride, dst_stride;								\
+    mask_type_t solid_mask;									\
+    const mask_type_t *mask = &solid_mask;							\
+    int src_stride, mask_stride, dst_stride;							\
 												\
     PIXMAN_IMAGE_GET_LINE (dst_image, dst_x, dst_y, dst_type_t, dst_stride, dst_line, 1);	\
+    if (have_mask)										\
+    {												\
+	if (mask_is_solid)									\
+	    solid_mask = _pixman_image_get_solid (imp, mask_image, dst_image->bits.format);	\
+	else											\
+	    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t,			\
+				   mask_stride, mask_line, 1);					\
+    }												\
     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be			\
      * transformed from destination space to source space */					\
     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1);		\
@@ -321,6 +332,11 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
     {												\
 	dst = dst_line;										\
 	dst_line += dst_stride;									\
+	if (have_mask && !mask_is_solid)							\
+	{											\
+	    mask = mask_line;									\
+	    mask_line += mask_stride;								\
+	}											\
 												\
 	y = vy >> 16;										\
 	vy += unit_y;										\
@@ -332,16 +348,18 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 	    src = src_first_line + src_stride * y;						\
 	    if (left_pad > 0)									\
 	    {											\
-		scanline_func (dst, src, left_pad, 0, 0, 0);					\
+		scanline_func (mask, dst, src, left_pad, 0, 0, 0);				\
 	    }											\
 	    if (width > 0)									\
 	    {											\
-		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\
+		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
+			       dst + left_pad, src, width, vx, unit_x, 0);			\
 	    }											\
 	    if (right_pad > 0)									\
 	    {											\
-		scanline_func (dst + left_pad + width, src + src_image->bits.width - 1,		\
-			        right_pad, 0, 0, 0);						\
+		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
+			       dst + left_pad + width, src + src_image->bits.width - 1,		\
+			       right_pad, 0, 0, 0);						\
 	    }											\
 	}											\
 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
@@ -349,43 +367,67 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 	    static const src_type_t zero[1] = { 0 };						\
 	    if (y < 0 || y >= src_image->bits.height)						\
 	    {											\
-		scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0);		\
+		scanline_func (mask, dst, zero, left_pad + width + right_pad, 0, 0, 0);		\
 		continue;									\
 	    }											\
 	    src = src_first_line + src_stride * y;						\
 	    if (left_pad > 0)									\
 	    {											\
-		scanline_func (dst, zero, left_pad, 0, 0, 0);					\
+		scanline_func (mask, dst, zero, left_pad, 0, 0, 0);				\
 	    }											\
 	    if (width > 0)									\
 	    {											\
-		scanline_func (dst + left_pad, src, width, vx, unit_x, 0);			\
+		scanline_func (mask + (mask_is_solid ? 0 : left_pad),				\
+			       dst + left_pad, src, width, vx, unit_x, 0);			\
 	    }											\
 	    if (right_pad > 0)									\
 	    {											\
-		scanline_func (dst + left_pad + width, zero, right_pad, 0, 0, 0);		\
+		scanline_func (mask + (mask_is_solid ? 0 : left_pad + width),			\
+			       dst + left_pad + width, zero, right_pad, 0, 0, 0);		\
 	    }											\
 	}											\
 	else											\
 	{											\
 	    src = src_first_line + src_stride * y;						\
-	    scanline_func (dst, src, width, vx, unit_x, max_vx);				\
+	    scanline_func (mask, dst, src, width, vx, unit_x, max_vx);				\
 	}											\
     }												\
 }
 
 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */
-#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
+#define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t,	\
+				  dst_type_t, repeat_mode, have_mask, mask_is_solid)		\
+	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,	\
+				  dst_type_t, repeat_mode, have_mask, mask_is_solid)
+
+#define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t,	\
 			      repeat_mode)							\
-	FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, dst_type_t,	\
+    static force_inline void									\
+    scanline_func##scale_func_name##_wrapper (							\
+		    const uint8_t    *mask,							\
+		    dst_type_t       *dst,							\
+		    const src_type_t *src,							\
+		    int32_t          w,								\
+		    pixman_fixed_t   vx,							\
+		    pixman_fixed_t   unit_x,							\
+		    pixman_fixed_t   max_vx)							\
+    {												\
+	scanline_func (dst, src, w, vx, unit_x, max_vx);					\
+    }												\
+    FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper,	\
+			       src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE)
+
+#define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t,		\
 			      repeat_mode)							\
+	FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t,		\
+			      dst_type_t, repeat_mode)
 
 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT,				\
 		     src_type_t, dst_type_t, OP, repeat_mode)				\
     FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
 			  SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t,		\
 			  OP, repeat_mode)						\
-    FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name ## _ ## OP,				\
+    FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP,			\
 			  scaled_nearest_scanline_ ## scale_func_name ## _ ## OP,	\
 			  src_type_t, dst_type_t, repeat_mode)
 
@@ -439,6 +481,90 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
     }
 
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	(SCALED_NEAREST_FLAGS		|				\
+	 FAST_PATH_NORMAL_REPEAT	|				\
+	 FAST_PATH_X_UNIT_POSITIVE),					\
+	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	(SCALED_NEAREST_FLAGS		|				\
+	 FAST_PATH_PAD_REPEAT		|				\
+	 FAST_PATH_X_UNIT_POSITIVE),					\
+	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	(SCALED_NEAREST_FLAGS		|				\
+	 FAST_PATH_NONE_REPEAT		|				\
+	 FAST_PATH_X_UNIT_POSITIVE),					\
+	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA),		\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	(SCALED_NEAREST_FLAGS		|				\
+	 FAST_PATH_NORMAL_REPEAT	|				\
+	 FAST_PATH_X_UNIT_POSITIVE),					\
+	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	(SCALED_NEAREST_FLAGS		|				\
+	 FAST_PATH_PAD_REPEAT		|				\
+	 FAST_PATH_X_UNIT_POSITIVE),					\
+	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	(SCALED_NEAREST_FLAGS		|				\
+	 FAST_PATH_NONE_REPEAT		|				\
+	 FAST_PATH_X_UNIT_POSITIVE),					\
+	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _none ## _ ## op,	\
+    }
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func)		\
+    {   PIXMAN_OP_ ## op,						\
+	PIXMAN_ ## s,							\
+	SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP,		\
+	PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA),	\
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				\
+	fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op,	\
+    }
+
 /* Prefer the use of 'cover' variant, because it is faster */
 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func)				\
     SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func),			\
@@ -446,4 +572,14 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
     SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func),				\
     SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func)
 
+#define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)			\
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),		\
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),		\
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
+#define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func)		\
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func),		\
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func),		\
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func)
+
 #endif
commit e83cee5aac26522f31a7e81ea3f972ae2248f6b0
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Fri Oct 22 16:29:01 2010 +0300

    test: Extend scaling-test to support a8/solid mask and ADD operation
    
    Image width also has been increased because SIMD optimizations typically
    do more unrolling in the inner loops, and this needs to be tested.

diff --git a/test/scaling-test.c b/test/scaling-test.c
index e91df32..dbb9d39 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -12,10 +12,10 @@
 #include <stdio.h>
 #include "utils.h"
 
-#define MAX_SRC_WIDTH  16
-#define MAX_SRC_HEIGHT 16
-#define MAX_DST_WIDTH  16
-#define MAX_DST_HEIGHT 16
+#define MAX_SRC_WIDTH  48
+#define MAX_SRC_HEIGHT 8
+#define MAX_DST_WIDTH  48
+#define MAX_DST_HEIGHT 8
 #define MAX_STRIDE     4
 
 /*
@@ -27,24 +27,32 @@ test_composite (int      testnum,
 {
     int                i;
     pixman_image_t *   src_img;
+    pixman_image_t *   mask_img;
     pixman_image_t *   dst_img;
     pixman_transform_t transform;
     pixman_region16_t  clip;
     int                src_width, src_height;
+    int                mask_width, mask_height;
     int                dst_width, dst_height;
-    int                src_stride, dst_stride;
+    int                src_stride, mask_stride, dst_stride;
     int                src_x, src_y;
+    int                mask_x, mask_y;
     int                dst_x, dst_y;
     int                src_bpp;
+    int                mask_bpp = 1;
     int                dst_bpp;
     int                w, h;
     pixman_fixed_t     scale_x = 65536, scale_y = 65536;
     pixman_fixed_t     translate_x = 0, translate_y = 0;
+    pixman_fixed_t     mask_scale_x = 65536, mask_scale_y = 65536;
+    pixman_fixed_t     mask_translate_x = 0, mask_translate_y = 0;
     pixman_op_t        op;
     pixman_repeat_t    repeat = PIXMAN_REPEAT_NONE;
+    pixman_repeat_t    mask_repeat = PIXMAN_REPEAT_NONE;
     pixman_format_code_t src_fmt, dst_fmt;
     uint32_t *         srcbuf;
     uint32_t *         dstbuf;
+    uint32_t *         maskbuf;
     uint32_t           crc32;
     FLOAT_REGS_CORRUPTION_DETECTOR_START ();
 
@@ -52,34 +60,68 @@ test_composite (int      testnum,
 
     src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
     dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
-    op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
+    switch (lcg_rand_n (3))
+    {
+    case 0:
+	op = PIXMAN_OP_SRC;
+	break;
+    case 1:
+	op = PIXMAN_OP_OVER;
+	break;
+    default:
+	op = PIXMAN_OP_ADD;
+	break;
+    }
 
     src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
     src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+
+    if (lcg_rand_n (2))
+    {
+	mask_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+	mask_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+    }
+    else
+    {
+	mask_width = mask_height = 1;
+    }
+
     dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
     dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
     src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+    mask_stride = mask_width * mask_bpp + lcg_rand_n (MAX_STRIDE) * mask_bpp;
     dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
 
     if (src_stride & 3)
 	src_stride += 2;
 
+    if (mask_stride & 1)
+	mask_stride += 1;
+    if (mask_stride & 2)
+	mask_stride += 2;
+
     if (dst_stride & 3)
 	dst_stride += 2;
 
     src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
     src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+    mask_x = -(mask_width / 4) + lcg_rand_n (mask_width * 3 / 2);
+    mask_y = -(mask_height / 4) + lcg_rand_n (mask_height * 3 / 2);
     dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
     dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
     w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
     h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
 
     srcbuf = (uint32_t *)malloc (src_stride * src_height);
+    maskbuf = (uint32_t *)malloc (mask_stride * mask_height);
     dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
 
     for (i = 0; i < src_stride * src_height; i++)
 	*((uint8_t *)srcbuf + i) = lcg_rand_n (256);
 
+    for (i = 0; i < mask_stride * mask_height; i++)
+	*((uint8_t *)maskbuf + i) = lcg_rand_n (256);
+
     for (i = 0; i < dst_stride * dst_height; i++)
 	*((uint8_t *)dstbuf + i) = lcg_rand_n (256);
 
@@ -92,13 +134,16 @@ test_composite (int      testnum,
     src_img = pixman_image_create_bits (
         src_fmt, src_width, src_height, srcbuf, src_stride);
 
+    mask_img = pixman_image_create_bits (
+        PIXMAN_a8, mask_width, mask_height, maskbuf, mask_stride);
+
     dst_img = pixman_image_create_bits (
         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
 
     image_endian_swap (src_img, src_bpp * 8);
     image_endian_swap (dst_img, dst_bpp * 8);
 
-    if (lcg_rand_n (8) > 0)
+    if (lcg_rand_n (4) > 0)
     {
 	scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
 	scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
@@ -109,6 +154,40 @@ test_composite (int      testnum,
 	pixman_image_set_transform (src_img, &transform);
     }
 
+    if (lcg_rand_n (2) > 0)
+    {
+	mask_scale_x = -32768 * 3 + lcg_rand_N (65536 * 5);
+	mask_scale_y = -32768 * 3 + lcg_rand_N (65536 * 5);
+	mask_translate_x = lcg_rand_N (65536);
+	mask_translate_y = lcg_rand_N (65536);
+	pixman_transform_init_scale (&transform, mask_scale_x, mask_scale_y);
+	pixman_transform_translate (&transform, NULL, mask_translate_x, mask_translate_y);
+	pixman_image_set_transform (mask_img, &transform);
+    }
+
+    switch (lcg_rand_n (4))
+    {
+    case 0:
+	mask_repeat = PIXMAN_REPEAT_NONE;
+	break;
+
+    case 1:
+	mask_repeat = PIXMAN_REPEAT_NORMAL;
+	break;
+
+    case 2:
+	mask_repeat = PIXMAN_REPEAT_PAD;
+	break;
+
+    case 3:
+	mask_repeat = PIXMAN_REPEAT_REFLECT;
+	break;
+
+    default:
+        break;
+    }
+    pixman_image_set_repeat (mask_img, mask_repeat);
+
     switch (lcg_rand_n (4))
     {
     case 0:
@@ -137,6 +216,11 @@ test_composite (int      testnum,
     else
 	pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
 
+    if (lcg_rand_n (2))
+	pixman_image_set_filter (mask_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+    else
+	pixman_image_set_filter (mask_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
     if (verbose)
     {
 	printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
@@ -183,6 +267,34 @@ test_composite (int      testnum,
     {
 	pixman_box16_t clip_boxes[2];
 	int            n = lcg_rand_n (2) + 1;
+
+	for (i = 0; i < n; i++)
+	{
+	    clip_boxes[i].x1 = lcg_rand_n (mask_width);
+	    clip_boxes[i].y1 = lcg_rand_n (mask_height);
+	    clip_boxes[i].x2 =
+		clip_boxes[i].x1 + lcg_rand_n (mask_width - clip_boxes[i].x1);
+	    clip_boxes[i].y2 =
+		clip_boxes[i].y1 + lcg_rand_n (mask_height - clip_boxes[i].y1);
+
+	    if (verbose)
+	    {
+		printf ("mask clip box: [%d,%d-%d,%d]\n",
+		        clip_boxes[i].x1, clip_boxes[i].y1,
+		        clip_boxes[i].x2, clip_boxes[i].y2);
+	    }
+	}
+
+	pixman_region_init_rects (&clip, clip_boxes, n);
+	pixman_image_set_clip_region (mask_img, &clip);
+	pixman_image_set_source_clipping (mask_img, 1);
+	pixman_region_fini (&clip);
+    }
+
+    if (lcg_rand_n (8) == 0)
+    {
+	pixman_box16_t clip_boxes[2];
+	int            n = lcg_rand_n (2) + 1;
 	for (i = 0; i < n; i++)
 	{
 	    clip_boxes[i].x1 = lcg_rand_n (dst_width);
@@ -204,8 +316,12 @@ test_composite (int      testnum,
 	pixman_region_fini (&clip);
     }
 
-    pixman_image_composite (op, src_img, NULL, dst_img,
+    if (lcg_rand_n (2) == 0)
+	pixman_image_composite (op, src_img, NULL, dst_img,
                             src_x, src_y, 0, 0, dst_x, dst_y, w, h);
+    else
+	pixman_image_composite (op, src_img, mask_img, dst_img,
+                            src_x, src_y, mask_x, mask_y, dst_x, dst_y, w, h);
 
     if (dst_fmt == PIXMAN_x8r8g8b8)
     {
@@ -230,10 +346,12 @@ test_composite (int      testnum,
     }
 
     pixman_image_unref (src_img);
+    pixman_image_unref (mask_img);
     pixman_image_unref (dst_img);
 
     crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
     free (srcbuf);
+    free (maskbuf);
     free (dstbuf);
 
     FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
@@ -245,6 +363,6 @@ main (int argc, const char *argv[])
 {
     pixman_disable_out_of_bounds_workaround ();
 
-    return fuzzer_test_main("scaling", 8000000, 0x7F1AB59F,
+    return fuzzer_test_main("scaling", 8000000, 0x80DF1CB2,
 			    test_composite, argc, argv);
 }
commit 97447f440fec9889bba6cc21c6d9366183c47e7e
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Jan 17 02:29:43 2011 +0200

    Use const modifiers for source buffers in nearest scaling fast paths

diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
index 372e9f9..525a2c4 100644
--- a/pixman/pixman-arm-common.h
+++ b/pixman/pixman-arm-common.h
@@ -282,15 +282,15 @@ cputype##_composite_##name (pixman_implementation_t *imp,               \
                                                src_type, dst_type)            \
 void                                                                          \
 pixman_scaled_nearest_scanline_##name##_##op##_asm_##cputype (                \
-                                                       int32_t        w,      \
-                                                       dst_type *     dst,    \
-                                                       src_type *     src,    \
-                                                       pixman_fixed_t vx,     \
-                                                       pixman_fixed_t unit_x);\
+                                                   int32_t          w,        \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x);  \
                                                                               \
 static force_inline void                                                      \
 scaled_nearest_scanline_##cputype##_##name##_##op (dst_type *       pd,       \
-                                                   src_type *       ps,       \
+                                                   const src_type * ps,       \
                                                    int32_t          w,        \
                                                    pixman_fixed_t   vx,       \
                                                    pixman_fixed_t   unit_x,   \
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 771ec9d..0cbe375 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1410,12 +1410,12 @@ FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
 
 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
 static force_inline void
-scaled_nearest_scanline_565_565_SRC (uint16_t *      dst,
-				     uint16_t *      src,
-				     int32_t         w,
-				     pixman_fixed_t  vx,
-				     pixman_fixed_t  unit_x,
-				     pixman_fixed_t  max_vx)
+scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
+				     const uint16_t * src,
+				     int32_t          w,
+				     pixman_fixed_t   vx,
+				     pixman_fixed_t   unit_x,
+				     pixman_fixed_t   max_vx)
 {
     uint16_t tmp1, tmp2, tmp3, tmp4;
     while ((w -= 4) >= 0)
diff --git a/pixman/pixman-fast-path.h b/pixman/pixman-fast-path.h
index 069a282..591ab48 100644
--- a/pixman/pixman-fast-path.h
+++ b/pixman/pixman-fast-path.h
@@ -138,12 +138,12 @@ pad_repeat_get_scanline_bounds (int32_t         source_image_width,
 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT,			\
 			      src_type_t, dst_type_t, OP, repeat_mode)				\
 static force_inline void									\
-scanline_func_name (dst_type_t     *dst,							\
-		    src_type_t     *src,							\
-		    int32_t         w,								\
-		    pixman_fixed_t  vx,								\
-		    pixman_fixed_t  unit_x,							\
-		    pixman_fixed_t  max_vx)							\
+scanline_func_name (dst_type_t       *dst,							\
+		    const src_type_t *src,							\
+		    int32_t           w,							\
+		    pixman_fixed_t    vx,							\
+		    pixman_fixed_t    unit_x,							\
+		    pixman_fixed_t    max_vx)							\
 {												\
 	uint32_t   d;										\
 	src_type_t s1, s2;									\
@@ -346,7 +346,7 @@ fast_composite_scaled_nearest  ## scale_func_name (pixman_implementation_t *imp,
 	}											\
 	else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE)				\
 	{											\
-	    static src_type_t zero[1] = { 0 };							\
+	    static const src_type_t zero[1] = { 0 };						\
 	    if (y < 0 || y >= src_image->bits.height)						\
 	    {											\
 		scanline_func (dst, zero, left_pad + width + right_pad, 0, 0, 0);		\


More information about the xorg-commit mailing list