pixman: Branch 'master' - 3 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Fri May 29 19:25:03 PDT 2009


 pixman/pixman-mmx.c         |    9 ++--
 pixman/pixman-sse2.c        |   85 +++++++++++++++++++++++-------------------
 test/scaling-test-bisect.rb |    2 -
 test/scaling-test.c         |   88 ++++++++++++++++++++++++++++++++------------
 4 files changed, 117 insertions(+), 67 deletions(-)

New commits:
commit 21034db1daf90ac2b17f6929e72b3a0b953e81c4
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed May 27 22:46:23 2009 +0300

    Scaling test updated to provide better coverage for problematic cases
    
    Now scaling test should reliably detect problems in new scaling code.
    Maximum image size reduced to improve performance (more tests can be
    run per second) and also simplify detected errors analysis.

diff --git a/test/scaling-test-bisect.rb b/test/scaling-test-bisect.rb
index 2e4aff3..ab34262 100644
--- a/test/scaling-test-bisect.rb
+++ b/test/scaling-test-bisect.rb
@@ -7,7 +7,7 @@ if not ARGV[0] or not ARGV[1] then
     exit(0)
 end
 
-$MAX = 10000
+$MAX = 3000000
 $MIN = 1
 $AVG = 0
 
diff --git a/test/scaling-test.c b/test/scaling-test.c
index 8810612..c85908d 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -1,3 +1,25 @@
+/*
+ * Test program, which can detect problems with nearest neighbout scaling
+ * implementation. Also SRC and OVER opetations tested for 16bpp and 32bpp
+ * images.
+ *
+ * Just run it without any command line arguments, and it will report either
+ *   "scaling test passed" - everything is ok
+ *   "scaling test failed!" - there is some problem
+ *
+ * In the case of failure, finding the problem involves the following steps:
+ * 1. Get the reference 'scaling-test' binary. It makes sense to disable all
+ *    the cpu specific optimizations in pixman and also configure it with
+ *    '--disable-shared' option. Those who are paranoid can also tweak the
+ *    sources to disable all fastpath functions. The resulting binary
+ *    can be renamed to something like 'scaling-test.ref'.
+ * 2. Compile the buggy binary (also with the '--disable-shared' option).
+ * 3. Run 'ruby scaling-test-bisect.rb ./scaling-test.ref ./scaling-test'
+ * 4. Look at the information about failed case (destination buffer content
+ *    will be shown) and try to figure out what is wrong. It is possible
+ *    to use debugging print to stderr in pixman to get more information,
+ *    this does not interfere with the testing script.
+ */
 #include <assert.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -117,11 +139,11 @@ static uint32_t Crc32_ComputeBuf( uint32_t inCrc32, const void *buf,
 }
 
 
-#define MAX_SRC_WIDTH  100
-#define MAX_SRC_HEIGHT 100
-#define MAX_DST_WIDTH  100
-#define MAX_DST_HEIGHT 100
-#define MAX_STRIDE     16
+#define MAX_SRC_WIDTH  10
+#define MAX_SRC_HEIGHT 10
+#define MAX_DST_WIDTH  10
+#define MAX_DST_HEIGHT 10
+#define MAX_STRIDE     4
 
 /*
  * Composite operation with pseudorandom images
@@ -164,10 +186,12 @@ uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
     if (src_stride & 3) src_stride += 2;
     if (dst_stride & 3) dst_stride += 2;
 
-    src_x = -src_width + lcg_rand_n(src_width * 3);
-    src_y = -src_height + lcg_rand_n(src_height * 3);
-    dst_x = -dst_width + lcg_rand_n(dst_width * 2);
-    dst_y = -dst_height + lcg_rand_n(dst_height * 3);
+    src_x = -(src_width / 4) + lcg_rand_n(src_width * 3 / 2);
+    src_y = -(src_height / 4) + lcg_rand_n(src_height * 3 / 2);
+    dst_x = -(dst_width / 4) + lcg_rand_n(dst_width * 3 / 2);
+    dst_y = -(dst_height / 4) + lcg_rand_n(dst_height * 3 / 2);
+    w = lcg_rand_n(dst_width * 3 / 2 - dst_x);
+    h = lcg_rand_n(dst_height * 3 / 2 - dst_y);
 
     srcbuf = (uint32_t *)malloc(src_stride * src_height);
     dstbuf = (uint32_t *)malloc(dst_stride * dst_height);
@@ -188,7 +212,7 @@ uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
     dst_img = pixman_image_create_bits(
         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
 
-    if (lcg_rand_n(2) == 0) {
+    if (lcg_rand_n(8) > 0) {
         scale_x = 32768 + lcg_rand_n(65536);
         scale_y = 32768 + lcg_rand_n(65536);
         pixman_transform_init_scale(&transform, scale_x, scale_y);
@@ -203,9 +227,6 @@ uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
     }
     pixman_image_set_repeat(src_img, repeat);
 
-    w = lcg_rand_n(MAX_DST_WIDTH * 2);
-    h = lcg_rand_n(MAX_DST_HEIGHT * 2);
-
     if (verbose) {
         printf("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
         printf("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
@@ -217,7 +238,7 @@ uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
         printf("w=%d, h=%d\n", w, h);
     }
 
-    if (lcg_rand_n(2) == 0) {
+    if (lcg_rand_n(8) == 0) {
         pixman_box16_t clip_boxes[2];
         int n = lcg_rand_n(2) + 1;
         for (i = 0; i < n; i++) {
@@ -237,9 +258,34 @@ uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
         pixman_region_fini(&clip);
     }
 
+    if (lcg_rand_n(8) == 0) {
+        pixman_box16_t clip_boxes[2];
+        int n = lcg_rand_n(2) + 1;
+        for (i = 0; i < n; i++) {
+            clip_boxes[i].x1 = lcg_rand_n(dst_width);
+            clip_boxes[i].y1 = lcg_rand_n(dst_height);
+            clip_boxes[i].x2 = clip_boxes[i].x1 + lcg_rand_n(dst_width - clip_boxes[i].x1);
+            clip_boxes[i].y2 = clip_boxes[i].y1 + lcg_rand_n(dst_height - clip_boxes[i].y1);
+            if (verbose) {
+                printf("destination clip box: [%d,%d-%d,%d]\n",
+                    clip_boxes[i].x1, clip_boxes[i].y1,
+                    clip_boxes[i].x2, clip_boxes[i].y2);
+            }
+        }
+        pixman_region_init_rects(&clip, clip_boxes, n);
+        pixman_image_set_clip_region(dst_img, &clip);
+        pixman_region_fini(&clip);
+    }
+
     pixman_image_composite (op, src_img, NULL, dst_img,
                             src_x, src_y, 0, 0, dst_x, dst_y, w, h);
 
+    if (dst_fmt == PIXMAN_x8r8g8b8) {
+        /* ignore unused part */
+        for (i = 0; i < dst_stride * dst_height / 4; i++)
+            dstbuf[i] &= 0xFFFFFF;
+    }
+
     if (verbose) {
         int j;
         for (i = 0; i < dst_height; i++) {
@@ -253,12 +299,6 @@ uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
     pixman_image_unref (src_img);
     pixman_image_unref (dst_img);
 
-    if (dst_fmt == PIXMAN_x8r8g8b8) {
-        /* ignore unused part */
-        for (i = 0; i < dst_stride * dst_height / 4; i++)
-            dstbuf[i] &= 0xFFFFFF;
-    }
-
     crc32 = Crc32_ComputeBuf(initcrc, dstbuf, dst_stride * dst_height);
     free(srcbuf);
     free(dstbuf);
@@ -273,7 +313,7 @@ int main(int argc, char *argv[])
     if (argc >= 2)
         n = atoi(argv[1]);
 
-    if (n == 0) n = 10000;
+    if (n == 0) n = 3000000;
 
     if (n < 0) {
         crc = test_composite(0, -n, 1);
@@ -285,15 +325,17 @@ int main(int argc, char *argv[])
             crc = test_composite(crc, i, 0);
         }
         printf("crc32=%08X\n", crc);
-        if (n == 10000) {
+#ifdef LITTLE_ENDIAN
+        if (n == 3000000) {
             /* predefined value for running with all the fastpath functions disabled  */
             /* it needs to be updated every time changes are introduced to this program! */
-            if (crc == 0xF2EC6250) {
+            if (crc == 0xC950E5BB) {
                 printf("scaling test passed\n");
             } else {
                 printf("scaling test failed!\n");
             }
         }
+#endif
     }
     return 0;
 }
commit 53ce8838254d436b6a4d527aacdece7dba7ceacd
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 29 22:21:37 2009 -0400

    In pixman-sse2.c test for non-zero source, not just non-zero source alpha.

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index a574787..40e2228 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -152,12 +152,23 @@ pack565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
     return _mm_packus_epi16 (pack565_2x128_128 (*xmm0, *xmm1), pack565_2x128_128 (*xmm2, *xmm3));
 }
 
-static force_inline uint32_t
-packAlpha (__m128i x)
+static force_inline int
+isOpaque (__m128i x)
+{
+    __m128i ffs = _mm_cmpeq_epi8 (x, x);
+    return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
+}
+
+static force_inline int
+isZero (__m128i x)
 {
-    return _mm_cvtsi128_si32 (_mm_packus_epi16 (_mm_packus_epi16 (_mm_srli_epi32 (x, 24),
-                                                                  _mm_setzero_si128 ()),
-                                                _mm_setzero_si128 ()));
+    return _mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) == 0xffff;
+}
+
+static force_inline int
+isTransparent (__m128i x)
+{
+    return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) & 0x8888) == 0x8888;
 }
 
 static force_inline __m128i
@@ -472,7 +483,7 @@ coreCombineOverUPixelsse2 (uint32_t src, uint32_t dst)
     {
         return src;
     }
-    else if (a)
+    else if (src)
     {
         ms = unpack_32_1x64 (src);
         return pack_1x64_32 (over_1x64 (ms, expandAlpha_1x64 (ms), unpack_32_1x64 (dst)));
@@ -513,7 +524,7 @@ combine4 (const __m128i *ps, const __m128i *pm)
     {
 	xmmMskLo = load128Unaligned (pm);
 
-        if (!packAlpha (xmmMskLo))
+	if (isTransparent (xmmMskLo))
 	    return _mm_setzero_si128 ();
     }
     
@@ -537,7 +548,6 @@ combine4 (const __m128i *ps, const __m128i *pm)
 static force_inline void
 coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
-    uint32_t pa;
     uint32_t s, d;
 
     __m128i xmmDstLo, xmmDstHi;
@@ -578,14 +588,11 @@ coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int
         /* I'm loading unaligned because I'm not sure about the address alignment. */
         xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
 
-        /* Check the alpha channel */
-        pa = packAlpha (xmmSrcHi);
-
-        if (pa == 0xffffffff)
+        if (isOpaque (xmmSrcHi))
         {
             save128Aligned ((__m128i*)pd, xmmSrcHi);
         }
-        else if (pa)
+        else if (!isZero (xmmSrcHi))
         {
             xmmDstHi = load128Aligned ((__m128i*) pd);
 
@@ -2512,7 +2519,7 @@ fbCompositeSolid_nx8888sse2 (pixman_implementation_t *imp,
 
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
-    if (src >> 24 == 0)
+    if (src == 0)
 	return;
 
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
@@ -2599,7 +2606,7 @@ fbCompositeSolid_nx0565sse2 (pixman_implementation_t *imp,
 
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
-    if (src >> 24 == 0)
+    if (src == 0)
         return;
 
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
@@ -2680,7 +2687,7 @@ fbCompositeSolidMask_nx8888x8888Csse2 (pixman_implementation_t *imp,
 				      int32_t	width,
 				      int32_t	height)
 {
-    uint32_t	src, srca;
+    uint32_t	src;
     uint32_t	*dstLine, d;
     uint32_t	*maskLine, m;
     uint32_t    packCmp;
@@ -2694,8 +2701,7 @@ fbCompositeSolidMask_nx8888x8888Csse2 (pixman_implementation_t *imp,
 
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
-    srca = src >> 24;
-    if (srca == 0)
+    if (src == 0)
 	return;
 
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
@@ -3220,7 +3226,7 @@ fbCompositeSolidMask_nx8x8888sse2 (pixman_implementation_t *imp,
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
     srca = src >> 24;
-    if (srca == 0)
+    if (src == 0)
 	return;
 
     fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
@@ -3495,7 +3501,7 @@ fbCompositeSolidMaskSrc_nx8x8888sse2 (pixman_implementation_t *imp,
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
     srca = src >> 24;
-    if (srca == 0)
+    if (src == 0)
     {
         pixmanFillsse2 (pDst->bits.bits, pDst->bits.rowstride,
                         PIXMAN_FORMAT_BPP (pDst->bits.format),
@@ -3633,7 +3639,7 @@ fbCompositeSolidMask_nx8x0565sse2 (pixman_implementation_t *imp,
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
     srca = src >> 24;
-    if (srca == 0)
+    if (src == 0)
 	return;
 
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
@@ -3770,9 +3776,9 @@ fbCompositeSrc_8888RevNPx0565sse2 (pixman_implementation_t *imp,
 {
     uint16_t	*dstLine, *dst, d;
     uint32_t	*srcLine, *src, s;
-    int	dstStride, srcStride;
+    int		dstStride, srcStride;
     uint16_t	w;
-    uint32_t    packCmp;
+    uint32_t    opaque, zero;
 
     __m64 ms;
     __m128i xmmSrc, xmmSrcLo, xmmSrcHi;
@@ -3827,34 +3833,35 @@ fbCompositeSrc_8888RevNPx0565sse2 (pixman_implementation_t *imp,
             xmmSrc = load128Unaligned((__m128i*)src);
             xmmDst = load128Aligned  ((__m128i*)dst);
 
-            packCmp = packAlpha (xmmSrc);
+            opaque = isOpaque (xmmSrc);
+	    zero = isZero (xmmSrc);
 
-            unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
+	    unpack565_128_4x128 (xmmDst, &xmmDst0, &xmmDst1, &xmmDst2, &xmmDst3);
             unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi);
 
             /* preload next round*/
             xmmSrc = load128Unaligned((__m128i*)(src+4));
-            /* preload next round*/
-
-            if (packCmp == 0xffffffff)
+	    
+            if (opaque)
             {
                 invertColors_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst0, &xmmDst1);
             }
-            else if (packCmp)
+            else if (!zero)
             {
                 overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst0, &xmmDst1);
             }
 
             /* Second round */
-            packCmp = packAlpha (xmmSrc);
+	    opaque = isOpaque (xmmSrc);
+	    zero = isZero (xmmSrc);
 
             unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi);
 
-            if (packCmp == 0xffffffff)
+            if (opaque)
             {
                 invertColors_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst2, &xmmDst3);
             }
-            else if (packCmp)
+            else if (zero)
             {
                 overRevNonPre_2x128 (xmmSrcLo, xmmSrcHi, &xmmDst2, &xmmDst3);
             }
@@ -3906,7 +3913,7 @@ fbCompositeSrc_8888RevNPx8888sse2 (pixman_implementation_t *imp,
     uint32_t	*srcLine, *src, s;
     int	dstStride, srcStride;
     uint16_t	w;
-    uint32_t    packCmp;
+    uint32_t    opaque, zero;
 
     __m128i xmmSrcLo, xmmSrcHi;
     __m128i xmmDstLo, xmmDstHi;
@@ -3957,17 +3964,18 @@ fbCompositeSrc_8888RevNPx8888sse2 (pixman_implementation_t *imp,
 
             xmmSrcHi = load128Unaligned((__m128i*)src);
 
-            packCmp = packAlpha (xmmSrcHi);
+            opaque = isOpaque (xmmSrcHi);
+	    zero = isZero (xmmSrcHi);
 
             unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
 
-            if (packCmp == 0xffffffff)
+            if (opaque)
             {
                 invertColors_2x128( xmmSrcLo, xmmSrcHi, &xmmDstLo, &xmmDstHi);
 
                 save128Aligned ((__m128i*)dst, pack_2x128_128 (xmmDstLo, xmmDstHi));
             }
-            else if (packCmp)
+            else if (!zero)
             {
                 xmmDstHi = load128Aligned  ((__m128i*)dst);
 
@@ -4016,7 +4024,7 @@ fbCompositeSolidMask_nx8888x0565Csse2 (pixman_implementation_t *imp,
 				      int32_t     width,
 				      int32_t     height)
 {
-    uint32_t	src, srca;
+    uint32_t	src;
     uint16_t	*dstLine, *dst, d;
     uint32_t	*maskLine, *mask, m;
     int	dstStride, maskStride;
@@ -4031,8 +4039,7 @@ fbCompositeSolidMask_nx8888x0565Csse2 (pixman_implementation_t *imp,
 
     fbComposeGetSolid(pSrc, src, pDst->bits.format);
 
-    srca = src >> 24;
-    if (srca == 0)
+    if (src == 0)
         return;
 
     fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
commit da9f3266fd00a5634fd2fb8a9cffbf24d668aaab
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 29 21:20:20 2009 -0400

    In the mmx implementation, check for source == 0 rather than alpha == 0.
    
    Otherwise we compute the incorrect value when the source has zero in
    the alpha channel, but non-zero in the color channels.

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 957aed2..db87b19 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -446,7 +446,7 @@ mmxCombineOverU (pixman_implementation_t *imp, pixman_op_t op,
 	uint32_t a = ssrc >> 24;
 	if (a == 0xff) {
 	    *dest = ssrc;
-	} else if (a) {
+	} else if (ssrc) {
 	    __m64 s, sa;
 	    s = load8888(ssrc);
 	    sa = expand_alpha(s);
@@ -1447,7 +1447,7 @@ fbCompositeSrc_8888x8888mmx (pixman_implementation_t *imp,
 	    a = s >> 24;
 	    if (a == 0xff)
 		*dst = s;
-	    else if (a) {
+	    else if (s) {
 		__m64 ms, sa;
 		ms = load8888(s);
 		sa = expand_alpha(ms);
@@ -2180,7 +2180,7 @@ fbCompositeSrc_8888RevNPx0565mmx (pixman_implementation_t *imp,
 
 		*(__m64 *)dst = vdest;
 	    }
-	    else if (a0 | a1 | a2 | a3)
+	    else if (s0 | s1 | s2 | s3)
 	    {
 		__m64 vdest = *(__m64 *)dst;
 
@@ -2289,7 +2289,7 @@ fbCompositeSrc_8888RevNPx8888mmx (pixman_implementation_t *imp,
 
 		*(__m64 *)dst = pack8888 (d0, d1);
 	    }
-	    else if (a0 | a1)
+	    else if (s0 | s1)
 	    {
 		__m64 vdest = *(__m64 *)dst;
 
@@ -3064,6 +3064,7 @@ static const FastPathInfo mmx_fast_paths[] =
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeSolid_nx0565mmx,	   0 },
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreammx,	   0 },
     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreammx,	   0 },
+
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrc_8888x8888mmx,	   0 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, fbCompositeSrc_8888x8888mmx,	   0 },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_r5g6b5,	 fbCompositeSrc_8888x0565mmx,	   0 },


More information about the xorg-commit mailing list