pixman: Branch 'master' - 47 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Wed May 13 03:52:48 PDT 2009


 pixman/Makefile.am                |    1 
 pixman/combine.inc                |  241 +++++++++---------
 pixman/combine.pl                 |    4 
 pixman/pixman-compose-accessors.c |    4 
 pixman/pixman-compose.c           |  496 +++++---------------------------------
 pixman/pixman-compute-region.c    |    2 
 pixman/pixman-image.c             |  226 +++++++++++++++++
 pixman/pixman-mmx.c               |  131 +++++-----
 pixman/pixman-private.h           |   46 ++-
 pixman/pixman-sse2.c              |  405 ++++++++++++++++++++++---------
 pixman/pixman-vmx.c               |   57 ----
 pixman/refactor                   |  288 ++++++++++++++++++++++
 test/composite-test.c             |    2 
 13 files changed, 1120 insertions(+), 783 deletions(-)

New commits:
commit e17fc72e958e1ddee0b24e8a90ae9609e1e44391
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed May 13 06:17:55 2009 -0400

    Remove unused CombineMask64 type

diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 072b0d2..0ef2ef2 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -160,7 +160,6 @@ typedef FASTCALL void (*storeProc32)(pixman_image_t *, uint32_t *bits,
                                      const uint32_t *values, int x, int width,
                                      const pixman_indexed_t *);
 
-typedef FASTCALL void (*CombineMask64) (uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*CombineFunc64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*fetchProc64)(bits_image_t *pict, int x, int y, int width,
                                      uint64_t *buffer);
@@ -192,7 +191,6 @@ typedef struct _FbComposeFunctions32 {
 typedef struct _FbComposeFunctions64 {
     CombineFunc64 *combineU;
     CombineFunc64 *combineC;
-    CombineMask64 combineMaskU;
 } FbComposeFunctions64;
 
 extern FbComposeFunctions32 pixman_composeFunctions;
commit a1bc6bf15995fae8be2de61f859fcc73d80f7b64
Merge: ffce146... e74a284...
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed May 13 05:58:36 2009 -0400

    Merge branch 'master' into refactor

commit ffce1461502c9cc4dbf613b64eddba4c4de6a4d4
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sat May 2 19:55:10 2009 -0400

    Remove unused combineMaskU functions

diff --git a/pixman/combine.inc b/pixman/combine.inc
index c1624ac..ff18732 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -18,21 +18,6 @@
 
 
 /*
- * Combine src and mask
- */
-FASTCALL static void
-pixman_fbCombineMaskU (comp4_t *src, const comp4_t *mask, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t a = *(mask + i) >> A_SHIFT;
-        comp4_t s = *(src + i);
-        FbByteMul(s, a);
-        *(src + i) = s;
-    }
-}
-
-/*
  * All of the composing functions
  */
 
@@ -1291,6 +1276,5 @@ static CombineFunc pixman_fbCombineFuncC[] = {
 
 FbComposeFunctions pixman_composeFunctions = {
     pixman_fbCombineFuncU,
-    pixman_fbCombineFuncC,
-    pixman_fbCombineMaskU
+    pixman_fbCombineFuncC, 
 };
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 4bc47dc..2030ddd 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -274,6 +274,8 @@ _pixman_image_get_fetcher (pixman_image_t *image,
     }
 }
 
+
+
 #define WRITE_ACCESS(f) ((image->common.write_func)? f##_accessors : f)
 
 static void
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f8a2af8..1c9b538 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -422,28 +422,6 @@ pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
 
 /* --------------- MMX code patch for fbcompose.c --------------------- */
 
-static FASTCALL void
-mmxCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
-{
-    const uint32_t *end = mask + width;
-    while (mask < end) {
-        uint32_t mmask = *mask;
-	uint32_t maska = mmask >> 24;
-	if (maska == 0) {
-	    *src = 0;
-	} else if (maska != 0xff) {
-	    __m64 a = load8888(mmask);
-	    __m64 s = load8888(*src);
-	    a = expand_alpha(a);
-	    s = pix_multiply(s, a);
-	    *src = store8888(s);
-	}
-	++src;
-	++mask;
-    }
-    _mm_empty();
-}
-
 static force_inline uint32_t
 combine (const uint32_t *src, const uint32_t *mask)
 {
@@ -955,8 +933,6 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseC;
         pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = mmxCombineXorC;
         pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = mmxCombineAddC;
-
-        pixman_composeFunctions.combineMaskU = mmxCombineMaskU;
     }
 
     initialized = TRUE;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index db57467..072b0d2 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -152,7 +152,6 @@ typedef struct point point_t;
  */
 
 #define FASTCALL
-typedef FASTCALL void (*CombineMask32) (uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*CombineFunc32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*fetchProc32)(bits_image_t *pict, int x, int y, int width,
                                      uint32_t *buffer);
@@ -188,7 +187,6 @@ typedef struct _FbComposeData {
 typedef struct _FbComposeFunctions32 {
     CombineFunc32 *combineU;
     CombineFunc32 *combineC;
-    CombineMask32 combineMaskU;
 } FbComposeFunctions32;
 
 typedef struct _FbComposeFunctions64 {
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index d815bfc..995c714 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2310,13 +2310,6 @@ createMask_2x32_128 (uint32_t mask0, uint32_t mask1)
 /* SSE2 code patch for fbcompose.c */
 
 static FASTCALL void
-sse2CombineMaskU (uint32_t *dst, const uint32_t *src, int width)
-{
-    coreCombineReverseInUsse2 (dst, src, NULL, width);
-    _mm_empty();
-}
-
-static FASTCALL void
 sse2CombineOverU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineOverUsse2 (dst, src, mask, width);
@@ -2533,8 +2526,6 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = sse2CombineXorC;
         pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = sse2CombineAddC;
 
-        pixman_composeFunctions.combineMaskU = sse2CombineMaskU;
-
 	_mm_empty();
     }
 
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 129cab7..6478b90 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -209,37 +209,6 @@ over (vector unsigned int src, vector unsigned int srca,
         vec_st ((vector unsigned int) tmp1, 0, dest );
 
 static FASTCALL void
-vmxCombineMaskU (uint32_t *src, const uint32_t *msk, int width)
-{
-    int i;
-    vector unsigned int  vsrc, vmsk;
-    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         src_mask, msk_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(src, msk)
-
-    /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(src, msk)
-
-        vsrc = pix_multiply (vsrc, splat_alpha (vmsk));
-
-        STORE_VECTOR(src)
-
-        msk+=4;
-        src+=4;
-    }
-
-    for (i = width%4; --i >= 0;) {
-        uint32_t a = msk[i] >> 24;
-        uint32_t s = src[i];
-        FbByteMul (s, a);
-        src[i] = s;
-    }
-}
-
-static FASTCALL void
 vmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
 {
     int i;
@@ -1060,7 +1029,5 @@ void fbComposeSetupVMX (void)
         pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC;
         pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = vmxCombineXorC;
         pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = vmxCombineAddC;
-
-        pixman_composeFunctions.combineMaskU = vmxCombineMaskU;
     }
 }
commit 38e5929400ea8d8bdf0830006f761a5498f558a5
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 14:18:32 2009 -0400

    Optimize source reading in combine4

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index b4732ee..d815bfc 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -506,22 +506,29 @@ combine1 (const uint32_t *ps, const uint32_t *pm)
 static force_inline __m128i
 combine4 (const __m128i *ps, const __m128i *pm)
 {
-    __m128i s = load128Unaligned (ps);
-
+    __m128i xmmSrcLo, xmmSrcHi;
+    __m128i xmmMskLo, xmmMskHi;
+    __m128i s;
+    
     if (pm)
     {
-	__m128i xmmSrcLo, xmmSrcHi;
-	__m128i xmmMskLo, xmmMskHi;
-
 	xmmMskLo = load128Unaligned (pm);
+
+        if (!packAlpha (xmmMskLo))
+	    return _mm_setzero_si128 ();
+    }
+    
+    s = load128Unaligned (ps);
 	
+    if (pm)
+    {
 	unpack_128_2x128 (s, &xmmSrcLo, &xmmSrcHi);
 	unpack_128_2x128 (xmmMskLo, &xmmMskLo, &xmmMskHi);
-
+	
 	expandAlpha_2x128 (xmmMskLo, xmmMskHi, &xmmMskLo, &xmmMskHi);
-
+	
 	pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMskLo, &xmmMskHi, &xmmSrcLo, &xmmSrcHi);
-
+	
 	s = pack_2x128_128 (xmmSrcLo, xmmSrcHi);
     }
 
commit 3d6720a22777523c428914c2f84439d240778484
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:04:36 2009 -0400

    Enable mmxCombineSaturateU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 9864d4e..f8a2af8 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -942,9 +942,7 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU; 
 	pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = mmxCombineSaturateU;
-#endif
 
         pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = mmxCombineSrcC;
         pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = mmxCombineOverC;
commit 742d444f96bf160d2b7707cc894dd9b516f3179c
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:04:29 2009 -0400

    Enable mmxCombineAddU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index bb6b2fd..9864d4e 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -941,8 +941,8 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU; 
-#if 0
 	pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = mmxCombineSaturateU;
 #endif
 
commit fd31818cfba0a750672bf50fbe550fa29ec77d99
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:04:23 2009 -0400

    Enable mmxCombineXorU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 900f2e4..bb6b2fd 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -940,9 +940,9 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU; 
 #if 0
-        pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU;
-        pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
+	pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = mmxCombineSaturateU;
 #endif
 
commit b7fe2f3378c6fb0828e863cb1a2df9191fb3e25e
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:04:16 2009 -0400

    Enable mmxCombineAtopReverseU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 31fdcaf..900f2e4 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -939,8 +939,8 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = mmxCombineSaturateU;
commit 55a703f88c60acef5f1053d2d409c6e7048a714c
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:04:10 2009 -0400

    Enable mmxCombineAtopU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 4494fcb..31fdcaf 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -938,8 +938,8 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
commit f747b4184865c5e8b1c36c7116b6a47560f26e8d
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:04:02 2009 -0400

    Enable mmxCombineOutReverseU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 7390af3..4494fcb 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -937,8 +937,8 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU;
commit 3c6fd2699dc2741b6ad121eb441a32b52b169a82
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:03:50 2009 -0400

    Enable mmxCombineOutU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 5208517..7390af3 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -936,8 +936,8 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = mmxCombineAtopReverseU;
commit 9d13da03b7d4525aa8ffbb9b787dee8964323810
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:03:39 2009 -0400

    Enable mmxCombineInReverseU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index d826ca0..5208517 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -935,8 +935,8 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = mmxCombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = mmxCombineAtopU;
commit 2262f0084722d8548071730f8fcbe318560e9fbf
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:03:32 2009 -0400

    Enable mmxCombineInU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 0f49bdf..d826ca0 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -934,8 +934,8 @@ fbComposeSetupMMX(void)
     {
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = mmxCombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = mmxCombineOutReverseU;
commit 5e5c78a6cc962f154b749d954c35ac663f8ac483
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:03:24 2009 -0400

    Enable mmxCombineOverReverseU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 5aadc1b..0f49bdf 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -933,8 +933,8 @@ fbComposeSetupMMX(void)
     if (pixman_have_mmx())
     {
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = mmxCombineOverU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = mmxCombineOutU;
commit 81342af3bda044c059a13a37a9ede542212dc5a2
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:03:16 2009 -0400

    Enable mmxCombineOverU

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 98726d7..5aadc1b 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -932,8 +932,8 @@ fbComposeSetupMMX(void)
     /* check if we have MMX support and initialize accordingly */
     if (pixman_have_mmx())
     {
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = mmxCombineOverU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = mmxCombineInReverseU;
commit 3d684556dbdb087fa6d0631f06ccde38bb02dea5
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 03:02:58 2009 -0400

    Implement the mmx combiners with masks (disabled)

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 8262cb1..98726d7 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -444,14 +444,32 @@ mmxCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
     _mm_empty();
 }
 
+static force_inline uint32_t
+combine (const uint32_t *src, const uint32_t *mask)
+{
+    uint32_t ssrc = *src;
+
+    if (mask)
+    {
+	__m64 m = load8888 (*mask);
+	__m64 s = load8888 (ssrc);
+
+	m = expand_alpha (m);
+	s = pix_multiply (s, m);
+
+	ssrc = store8888 (s);
+    }
+
+    return ssrc;
+}
 
 static FASTCALL void
-mmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineOverU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
-	uint32_t ssrc = *src;
+	uint32_t ssrc = combine (src, mask);
 	uint32_t a = ssrc >> 24;
 	if (a == 0xff) {
 	    *dest = ssrc;
@@ -463,70 +481,79 @@ mmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
 	}
 	++dest;
 	++src;
+	if (mask)
+	    ++mask;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
 	__m64 d, da;
+	uint32_t s = combine (src, mask);
 	d = load8888(*dest);
 	da = expand_alpha(d);
-	*dest = store8888(over (d, da, load8888(*src)));
+	*dest = store8888(over (d, da, load8888(s)));
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineInU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineInU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
         __m64 x, a;
-        x = load8888(*src);
+        x = load8888 (combine (src, mask));
         a = load8888(*dest);
         a = expand_alpha(a);
         x = pix_multiply(x, a);
         *dest = store8888(x);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineInReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
         __m64 x, a;
         x = load8888(*dest);
-        a = load8888(*src);
+        a = load8888(combine (src, mask));
         a = expand_alpha(a);
         x = pix_multiply(x, a);
         *dest = store8888(x);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineOutU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
         __m64 x, a;
-        x = load8888(*src);
+        x = load8888(combine (src, mask));
         a = load8888(*dest);
         a = expand_alpha(a);
         a = negate(a);
@@ -534,37 +561,41 @@ mmxCombineOutU (uint32_t *dest, const uint32_t *src, int width)
         *dest = store8888(x);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
         __m64 x, a;
         x = load8888(*dest);
-        a = load8888(*src);
+        a = load8888(combine (src, mask));
         a = expand_alpha(a);
         a = negate(a);
         x = pix_multiply(x, a);
         *dest = store8888(x);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineAtopU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
         __m64 s, da, d, sia;
-        s = load8888(*src);
+        s = load8888(combine (src, mask));
         d = load8888(*dest);
         sia = expand_alpha(s);
         sia = negate(sia);
@@ -573,12 +604,14 @@ mmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
         *dest = store8888(s);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end;
 
@@ -586,7 +619,7 @@ mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
 
     while (dest < end) {
         __m64 s, dia, d, sa;
-        s = load8888(*src);
+        s = load8888(combine(src, mask));
         d = load8888(*dest);
         sa = expand_alpha(s);
         dia = expand_alpha(d);
@@ -595,18 +628,20 @@ mmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
         *dest = store8888(s);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineXorU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
 
     while (dest < end) {
         __m64 s, dia, d, sia;
-        s = load8888(*src);
+        s = load8888(combine(src, mask));
         d = load8888(*dest);
         sia = expand_alpha(s);
         dia = expand_alpha(d);
@@ -616,32 +651,36 @@ mmxCombineXorU (uint32_t *dest, const uint32_t *src, int width)
         *dest = store8888(s);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineAddU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
     while (dest < end) {
         __m64 s, d;
-        s = load8888(*src);
+        s = load8888(combine(src,mask));
         d = load8888(*dest);
         s = pix_add(s, d);
         *dest = store8888(s);
         ++dest;
         ++src;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
 
 static FASTCALL void
-mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
+mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = dest + width;
     while (dest < end) {
-        uint32_t s = *src;
+        uint32_t s = combine(src,mask);
         uint32_t d = *dest;
         __m64 ms = load8888(s);
         __m64 md = load8888(d);
@@ -657,6 +696,8 @@ mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
         *dest = store8888(md);
         ++src;
         ++dest;
+	if (mask)
+	    mask++;
     }
     _mm_empty();
 }
commit cdb6aa49bec3595a00eb203c258111c713de9bbc
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:01:09 2009 -0400

    Enable sse2CombineSaturateU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 001df0b..b4732ee 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2511,10 +2511,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = sse2CombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = sse2CombineAddU;
-#if 0
 
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = sse2CombineSaturateU;
-#endif
 
         pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = sse2CombineSrcC;
         pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = sse2CombineOverC;
commit 29528b9523e779ff59029f11f79f1e22cbeaf4cd
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:52 2009 -0400

    Enable sse2CombineAddU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 8d45e24..001df0b 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2510,8 +2510,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = sse2CombineXorU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = sse2CombineAddU;
+#if 0
 
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = sse2CombineSaturateU;
 #endif
commit 374ad0c363baf93e724409f575e1bbd7cfd8914a
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:47 2009 -0400

    Enable sse2CombineAtopXorU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index e225db3..8d45e24 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2509,8 +2509,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = sse2CombineXorU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = sse2CombineAddU;
 
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = sse2CombineSaturateU;
commit c1bdbff80ac724cab8213d41f91c525e10ca9ff1
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:39 2009 -0400

    Enable sse2CombineAtopReverseU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 5a21bef..e225db3 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2508,8 +2508,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = sse2CombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = sse2CombineAddU;
 
commit 74d79f271c45807bf23b395e7050130f7da1139c
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:33 2009 -0400

    Enable sse2CombineAtopU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index bd7b8e6..5a21bef 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2507,8 +2507,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = sse2CombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = sse2CombineAddU;
commit c3d92fe51869c4e7b4ed83fb3bed5d0e7e651782
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:26 2009 -0400

    Enable sse2CombineOutReverseU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 6087b84..bd7b8e6 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2506,8 +2506,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = sse2CombineXorU;
commit 53809bde5265378c400792bdb0b2639a0cde6c08
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:20 2009 -0400

    Enable sse2CombineOutU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index a46e9bb..6087b84 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2505,8 +2505,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
commit 9293a51323e7e2b4aedb75c3fa55475aa4a269e7
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:12 2009 -0400

    Enable sse2CombineInReverseU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index e9eec51..a46e9bb 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2504,8 +2504,8 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = sse2CombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
commit d45c0db9603a84989d59e5e1519b424ab756f221
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 01:00:07 2009 -0400

    Enable sse2CombineInU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index fcbb3b7..e9eec51 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2503,8 +2503,8 @@ fbComposeSetupSSE2(void)
         /* SSE code patch for fbcompose.c */
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = sse2CombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
commit 92c1199bf7e9379beca52fa880a0a436ffdda7e2
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 00:59:59 2009 -0400

    Enable sse2CombineOverReverseU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 377d86f..fcbb3b7 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2502,8 +2502,8 @@ fbComposeSetupSSE2(void)
 
         /* SSE code patch for fbcompose.c */
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = sse2CombineOverU;
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
commit d1879bc048be083198a35bb037273171bc07a211
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 00:59:51 2009 -0400

    Enable sse2CombineOverU

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index d24b625..377d86f 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2501,8 +2501,8 @@ fbComposeSetupSSE2(void)
         xMaskAlpha = createMask_2x32_64 (0x00ff0000, 0x00000000);
 
         /* SSE code patch for fbcompose.c */
-#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = sse2CombineOverU;
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
commit 22fda2d1aba7368a7545d1659b737e695a6c5b26
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 00:59:29 2009 -0400

    Implement the sse2 combiners with masks (disabled)

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 13509c9..d24b625 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -311,7 +311,7 @@ load128Aligned (__m128i* src)
 
 /* load 4 pixels from a unaligned address */
 static force_inline __m128i
-load128Unaligned (__m128i* src)
+load128Unaligned (const __m128i* src)
 {
     return _mm_loadu_si128 (src);
 }
@@ -482,8 +482,54 @@ coreCombineOverUPixelsse2 (uint32_t src, uint32_t dst)
     return dst;
 }
 
+static force_inline uint32_t
+combine1 (const uint32_t *ps, const uint32_t *pm)
+{
+    uint32_t s = *ps;
+
+    if (pm)
+    {
+	__m64 ms, mm;
+
+	mm = unpack_32_1x64 (*pm);
+	mm = expandAlpha_1x64 (mm);
+	
+	ms = unpack_32_1x64 (s);
+	ms = pixMultiply_1x64 (ms, mm);
+
+	s = pack_1x64_32 (ms);
+    }
+
+    return s;
+}
+
+static force_inline __m128i
+combine4 (const __m128i *ps, const __m128i *pm)
+{
+    __m128i s = load128Unaligned (ps);
+
+    if (pm)
+    {
+	__m128i xmmSrcLo, xmmSrcHi;
+	__m128i xmmMskLo, xmmMskHi;
+
+	xmmMskLo = load128Unaligned (pm);
+	
+	unpack_128_2x128 (s, &xmmSrcLo, &xmmSrcHi);
+	unpack_128_2x128 (xmmMskLo, &xmmMskLo, &xmmMskHi);
+
+	expandAlpha_2x128 (xmmMskLo, xmmMskHi, &xmmMskLo, &xmmMskHi);
+
+	pixMultiply_2x128 (&xmmSrcLo, &xmmSrcHi, &xmmMskLo, &xmmMskHi, &xmmSrcLo, &xmmSrcHi);
+
+	s = pack_2x128_128 (xmmSrcLo, xmmSrcHi);
+    }
+
+    return s;
+}
+
 static force_inline void
-coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     uint32_t pa;
     uint32_t s, d;
@@ -495,30 +541,36 @@ coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     /* Align dst on a 16-byte boundary */
     while (w &&
            ((unsigned long)pd & 15))
     {
         d = *pd;
-        s = *ps++;
+        s = combine1 (ps, pm);
 
         *pd++ = coreCombineOverUPixelsse2 (s, d);
+	ps++;
+	if (pm)
+	    pm++;
         w--;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
         /* I'm loading unaligned because I'm not sure about the address alignment. */
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
 
         /* Check the alpha channel */
         pa = packAlpha (xmmSrcHi);
@@ -545,20 +597,25 @@ coreCombineOverUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         w -= 4;
         ps += 4;
         pd += 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
         d = *pd;
-        s = *ps++;
+        s = combine1 (ps, pm);
 
         *pd++ = coreCombineOverUPixelsse2 (s, d);
+	ps++;
+	if (pm)
+	    pm++;
         w--;
     }
 }
 
 static force_inline void
-coreCombineOverReverseUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineOverReverseUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     uint32_t s, d;
 
@@ -569,30 +626,36 @@ coreCombineOverReverseUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     /* Align dst on a 16-byte boundary */
     while (w &&
            ((unsigned long)pd & 15))
     {
         d = *pd;
-        s = *ps++;
+        s = combine1 (ps, pm);
 
         *pd++ = coreCombineOverUPixelsse2 (d, s);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
         /* I'm loading unaligned because I'm not sure about the address alignment. */
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
         xmmDstHi = load128Aligned ((__m128i*) pd);
 
         unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
@@ -608,15 +671,20 @@ coreCombineOverReverseUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         w -= 4;
         ps += 4;
         pd += 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
         d = *pd;
-        s = *ps++;
+        s = combine1 (ps, pm);
 
         *pd++ = coreCombineOverUPixelsse2 (d, s);
+	ps++;
         w--;
+	if (pm)
+	    pm++;
     }
 }
 
@@ -638,7 +706,7 @@ coreCombineInUPixelsse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     uint32_t s, d;
 
@@ -648,28 +716,34 @@ coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineInUPixelsse2 (d, s);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
         xmmDstHi = load128Aligned ((__m128i*) pd);
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*) ps, (__m128i*) pm);
 
         unpack_128_2x128 (xmmDstHi, &xmmDstLo, &xmmDstHi);
         expandAlpha_2x128 (xmmDstLo, xmmDstHi, &xmmDstLo, &xmmDstHi);
@@ -682,20 +756,25 @@ coreCombineInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         ps += 4;
         pd += 4;
         w -= 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineInUPixelsse2 (d, s);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 }
 
 static force_inline void
-coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w)
 {
     uint32_t s, d;
 
@@ -705,28 +784,34 @@ coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineInUPixelsse2 (s, d);
+	ps++;
         w--;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
         xmmDstHi = load128Aligned ((__m128i*) pd);
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*) ps, (__m128i*)pm);
 
         unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
         expandAlpha_2x128 (xmmSrcLo, xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
@@ -739,37 +824,47 @@ coreCombineReverseInUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         ps += 4;
         pd += 4;
         w -= 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineInUPixelsse2 (s, d);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 }
 
 static force_inline void
-coreCombineReverseOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineReverseOutUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        uint32_t s = *ps++;
+        uint32_t s = combine1 (ps, pm);
         uint32_t d = *pd;
 
         *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (s)))));
+	if (pm)
+	    pm++;
+	ps++;
         w--;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
@@ -779,8 +874,9 @@ coreCombineReverseOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
         xmmDstHi = load128Aligned ((__m128i*) pd);
 
         unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
@@ -795,38 +891,48 @@ coreCombineReverseOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
 
         ps += 4;
         pd += 4;
+	if (pm)
+	    pm += 4;
         w -= 4;
     }
 
     while (w)
     {
-        uint32_t s = *ps++;
+        uint32_t s = combine1 (ps, pm);
         uint32_t d = *pd;
 
         *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (d), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (s)))));
+	ps++;
+	if (pm)
+	    pm++;
         w--;
     }
 }
 
 static force_inline void
-coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        uint32_t s = *ps++;
+        uint32_t s = combine1 (ps, pm);
         uint32_t d = *pd;
 
         *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (d)))));
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
@@ -836,8 +942,9 @@ coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*) ps, (__m128i*)pm);
         xmmDstHi = load128Aligned ((__m128i*) pd);
 
         unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
@@ -853,15 +960,20 @@ coreCombineOutUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         ps += 4;
         pd += 4;
         w -= 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
-        uint32_t s = *ps++;
+        uint32_t s = combine1 (ps, pm);
         uint32_t d = *pd;
 
         *pd++ = pack_1x64_32 (pixMultiply_1x64 (unpack_32_1x64 (s), negate_1x64 (expandAlpha_1x64 (unpack_32_1x64 (d)))));
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 }
 
@@ -878,7 +990,7 @@ coreCombineAtopUPixelsse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     uint32_t s, d;
 
@@ -890,27 +1002,33 @@ coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineAtopUPixelsse2 (s, d);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
         xmmDstHi = load128Aligned ((__m128i*) pd);
 
         unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
@@ -930,15 +1048,20 @@ coreCombineAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         ps += 4;
         pd += 4;
         w -= 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineAtopUPixelsse2 (s, d);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 }
 
@@ -955,7 +1078,7 @@ coreCombineReverseAtopUPixelsse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-coreCombineReverseAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
+coreCombineReverseAtopUsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
 {
     uint32_t s, d;
 
@@ -967,27 +1090,33 @@ coreCombineReverseAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineReverseAtopUPixelsse2 (s, d);
+	ps++;
         w--;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
-        xmmSrcHi = load128Unaligned ((__m128i*) ps);
+        xmmSrcHi = combine4 ((__m128i*)ps, (__m128i*)pm);
         xmmDstHi = load128Aligned ((__m128i*) pd);
 
         unpack_128_2x128 (xmmSrcHi, &xmmSrcLo, &xmmSrcHi);
@@ -1007,15 +1136,20 @@ coreCombineReverseAtopUsse2 (uint32_t* pd, const uint32_t* ps, int w)
         ps += 4;
         pd += 4;
         w -= 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineReverseAtopUPixelsse2 (s, d);
+	ps++;
         w--;
+	if (pm)
+	    pm++;
     }
 }
 
@@ -1032,13 +1166,14 @@ coreCombineXorUPixelsse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, int width)
+coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mask, int width)
 {
     int w = width;
     uint32_t s, d;
     uint32_t* pd = dst;
     const uint32_t* ps = src;
-
+    const uint32_t* pm = mask;
+    
     __m128i xmmSrc, xmmSrcLo, xmmSrcHi;
     __m128i xmmDst, xmmDstLo, xmmDstHi;
     __m128i xmmAlphaSrcLo, xmmAlphaSrcHi;
@@ -1047,27 +1182,33 @@ coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, int width)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineXorUPixelsse2 (s, d);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
-        xmmSrc = load128Unaligned ((__m128i*) ps);
+        xmmSrc = combine4 ((__m128i*) ps, (__m128i*) pm);
         xmmDst = load128Aligned ((__m128i*) pd);
 
         unpack_128_2x128 (xmmSrc, &xmmSrcLo, &xmmSrcHi);
@@ -1088,34 +1229,44 @@ coreCombineXorUsse2 (uint32_t* dst, const uint32_t* src, int width)
         ps += 4;
         pd += 4;
         w -= 4;
+	if (pm)
+	    pm += 4;
     }
 
     while (w)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
 
         *pd++ = coreCombineXorUPixelsse2 (s, d);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 }
 
 static force_inline void
-coreCombineAddUsse2 (uint32_t* dst, const uint32_t* src, int width)
+coreCombineAddUsse2 (uint32_t* dst, const uint32_t* src, const uint32_t* mask, int width)
 {
     int w = width;
     uint32_t s,d;
     uint32_t* pd = dst;
     const uint32_t* ps = src;
+    const uint32_t* pm = mask;
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
+	ps++;
+	if (pm)
+	    pm++;
         *pd++ = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
         w--;
     }
@@ -1123,26 +1274,36 @@ coreCombineAddUsse2 (uint32_t* dst, const uint32_t* src, int width)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
+	__m128i s;
+	
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
+	s = combine4((__m128i*)ps,(__m128i*)pm);
+	
         save128Aligned( (__m128i*)pd,
-                        _mm_adds_epu8( load128Unaligned((__m128i*)ps),
-                                       load128Aligned  ((__m128i*)pd)) );
+                        _mm_adds_epu8( s, load128Aligned  ((__m128i*)pd)) );
         pd += 4;
         ps += 4;
+	if (pm)
+	    pm += 4;
         w -= 4;
     }
 
     while (w--)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
+	ps++;
         *pd++ = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+	if (pm)
+	    pm++;
     }
 }
 
@@ -1163,7 +1324,7 @@ coreCombineSaturateUPixelsse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, int w)
+coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s,d;
 
@@ -1173,27 +1334,33 @@ coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, int w)
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
         *pd++ = coreCombineSaturateUPixelsse2 (s, d);
         w--;
+	ps++;
+	if (pm)
+	    pm++;
     }
 
     /* call prefetch hint to optimize cache load*/
     cachePrefetch ((__m128i*)ps);
     cachePrefetch ((__m128i*)pd);
+    cachePrefetch ((__m128i*)pm);
 
     while (w >= 4)
     {
         /* fill cache line with next memory */
         cachePrefetchNext ((__m128i*)ps);
         cachePrefetchNext ((__m128i*)pd);
+	cachePrefetchNext ((__m128i*)pm);
 
         xmmDst = load128Aligned  ((__m128i*)pd);
-        xmmSrc = load128Unaligned((__m128i*)ps);
+        xmmSrc = combine4 ((__m128i*)ps, (__m128i*)pm);
 
         packCmp = _mm_movemask_epi8 (_mm_cmpgt_epi32 (_mm_srli_epi32 (xmmSrc, 24),
                                                       _mm_srli_epi32 (_mm_xor_si128 (xmmDst, Maskff000000), 24)));
@@ -1201,21 +1368,29 @@ coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, int w)
         /* if some alpha src is grater than respective ~alpha dst */
         if (packCmp)
         {
-            s = *ps++;
+            s = combine1 (ps++, pm);
             d = *pd;
             *pd++ = coreCombineSaturateUPixelsse2 (s, d);
+	    if (pm)
+		pm++;
 
-            s = *ps++;
+            s = combine1 (ps++, pm);
             d = *pd;
             *pd++ = coreCombineSaturateUPixelsse2 (s, d);
+	    if (pm)
+		pm++;
 
-            s = *ps++;
+            s = combine1 (ps++, pm);
             d = *pd;
             *pd++ = coreCombineSaturateUPixelsse2 (s, d);
+	    if (pm)
+		pm++;
 
-            s = *ps++;
+            s = combine1 (ps++, pm);
             d = *pd;
             *pd++ = coreCombineSaturateUPixelsse2 (s, d);
+	    if (pm)
+		pm++;
         }
         else
         {
@@ -1223,6 +1398,8 @@ coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, int w)
 
             pd += 4;
             ps += 4;
+	    if (pm)
+		pm += 4;
         }
 
         w -= 4;
@@ -1230,9 +1407,12 @@ coreCombineSaturateUsse2 (uint32_t *pd, const uint32_t *ps, int w)
 
     while (w--)
     {
-        s = *ps++;
+        s = combine1 (ps, pm);
         d = *pd;
         *pd++ = coreCombineSaturateUPixelsse2 (s, d);
+	ps++;
+	if (pm)
+	    pm++;
     }
 }
 
@@ -2125,84 +2305,84 @@ createMask_2x32_128 (uint32_t mask0, uint32_t mask1)
 static FASTCALL void
 sse2CombineMaskU (uint32_t *dst, const uint32_t *src, int width)
 {
-    coreCombineReverseInUsse2 (dst, src, width);
+    coreCombineReverseInUsse2 (dst, src, NULL, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOverU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineOverU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineOverUsse2 (dst, src, width);
+    coreCombineOverUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOverReverseU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineOverReverseU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineOverReverseUsse2 (dst, src, width);
+    coreCombineOverReverseUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineInU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineInU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineInUsse2 (dst, src, width);
+    coreCombineInUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineInReverseU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineInReverseU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineReverseInUsse2 (dst, src, width);
+    coreCombineReverseInUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOutU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineOutU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineOutUsse2 (dst, src, width);
+    coreCombineOutUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOutReverseU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineOutReverseU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineReverseOutUsse2 (dst, src, width);
+    coreCombineReverseOutUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineAtopU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineAtopU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineAtopUsse2 (dst, src, width);
+    coreCombineAtopUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineAtopReverseU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineAtopReverseU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineReverseAtopUsse2 (dst, src, width);
+    coreCombineReverseAtopUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineXorU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineXorU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineXorUsse2 (dst, src, width);
+    coreCombineXorUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineAddU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineAddU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineAddUsse2 (dst, src, width);
+    coreCombineAddUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineSaturateU (uint32_t *dst, const uint32_t *src, int width)
+sse2CombineSaturateU (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
-    coreCombineSaturateUsse2 (dst, src, width);
+    coreCombineSaturateUsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
@@ -2327,7 +2507,6 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = sse2CombineInReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = sse2CombineOutU;
-
         pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = sse2CombineOutReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = sse2CombineAtopU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = sse2CombineAtopReverseU;
@@ -2921,7 +3100,7 @@ fbCompositeSrc_8888x8888sse2 (pixman_op_t op,
 
     while (height--)
     {
-        coreCombineOverUsse2 (dst, src, width);
+        coreCombineOverUsse2 (dst, src, NULL, width);
 
         dst += dstStride;
         src += srcStride;
@@ -4351,7 +4530,7 @@ fbCompositeSrcAdd_8000x8000sse2 (pixman_op_t op,
             w--;
         }
 
-        coreCombineAddUsse2 ((uint32_t*)dst, (uint32_t*)src, w >> 2);
+        coreCombineAddUsse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
 
         /* Small tail */
         dst += w & 0xfffc;
@@ -4401,7 +4580,7 @@ fbCompositeSrcAdd_8888x8888sse2 (pixman_op_t 	op,
         src = srcLine;
         srcLine += srcStride;
 
-        coreCombineAddUsse2 (dst, src, width);
+        coreCombineAddUsse2 (dst, src, NULL, width);
     }
 
     _mm_empty();
commit 1ddd91bfee87c13ce18d82d9ab9b2fb2de7cca22
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Fri May 1 00:58:38 2009 -0400

    Use memcpy() in fbCombineSrcU when there is no mask

diff --git a/pixman/combine.inc b/pixman/combine.inc
index 8c0955a..c1624ac 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -62,11 +62,16 @@ fbCombineSrcU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width
 {
     int i;
 
-    for (i = 0; i < width; ++i)
+    if (!mask)
+	memcpy (dest, src, width * sizeof (comp4_t));
+    else
     {
-	comp4_t s = combineMask (src, mask, i);
-
-	*(dest + i) = s;
+	for (i = 0; i < width; ++i)
+	{
+	    comp4_t s = combineMask (src, mask, i);
+	    
+	    *(dest + i) = s;
+	}
     }
 }
 
commit 24012542295f80455c8df01262099b98d2b2de37
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 17:59:09 2009 -0400

    Have the generic code go through the component alpha path always

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 9533b5f..be922a7 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -47,6 +47,7 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
     uint32_t *bits;
     int32_t stride;
     source_pict_class_t srcClass, maskClass;
+    pixman_bool_t component_alpha;
 
     srcClass = _pixman_image_classify (data->src,
 				       data->xSrc, data->ySrc,
@@ -101,16 +102,32 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 	stride = 0;
     }
 
-    if (fetchSrc		   &&
+    component_alpha =
+	fetchSrc		   &&
 	fetchMask		   &&
 	data->mask		   &&
 	data->mask->common.type == BITS &&
 	data->mask->common.component_alpha &&
-	PIXMAN_FORMAT_RGB (data->mask->bits.format))
+	PIXMAN_FORMAT_RGB (data->mask->bits.format);
+
     {
-	CombineFunc32 compose =
-	    wide ? (CombineFunc32)pixman_composeFunctions64.combineC[data->op] :
-		   pixman_composeFunctions.combineC[data->op];
+	CombineFunc32 compose;
+
+	if (wide)
+	{
+	    if (component_alpha)
+		compose = (CombineFunc32)pixman_composeFunctions64.combineC[data->op];
+	    else
+		compose = (CombineFunc32)pixman_composeFunctions64.combineU[data->op];
+	}
+	else
+	{
+	    if (component_alpha)
+		compose = pixman_composeFunctions.combineC[data->op];
+	    else
+		compose = pixman_composeFunctions.combineU[data->op];
+	}
+
 	if (!compose)
 	    return;
 
@@ -175,106 +192,6 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 	    }
 	}
     }
-    else
-    {
-	void *src_mask_buffer = 0;
-	const int useMask = (fetchMask != NULL);
-	CombineFunc32 compose =
-	    wide ? (CombineFunc32)pixman_composeFunctions64.combineU[data->op] :
-		   pixman_composeFunctions.combineU[data->op];
-	if (!compose)
-	    return;
-
-	for (i = 0; i < data->height; ++i) {
-	    /* fill first half of scanline with source */
-	    if (fetchSrc)
-	    {
-		if (fetchMask)
-		{
-		    /* fetch mask before source so that fetching of
-		       source can be optimized */
-		    fetchMask (data->mask, data->xMask, data->yMask + i,
-			       data->width, mask_buffer, 0, 0);
-
-		    if (maskClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
-			fetchMask = NULL;
-		}
-
-		if (srcClass == SOURCE_IMAGE_CLASS_HORIZONTAL)
-		{
-		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
-			      data->width, src_buffer, 0, 0);
-
-		    if (useMask)
-		    {
-			if (wide)
-			    pixman_composeFunctions64.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
-			else
-			    pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
-
-			src_mask_buffer = mask_buffer;
-		    }
-		    else
-			src_mask_buffer = src_buffer;
-
-		    fetchSrc = NULL;
-		}
-		else
-		{
-		    fetchSrc (data->src, data->xSrc, data->ySrc + i,
-			      data->width, src_buffer,
-			      useMask ? mask_buffer : NULL, 0xff000000);
-
-		    if (useMask) {
-			if (wide)
-			    pixman_composeFunctions64.combineMaskU (src_buffer,
-								    mask_buffer,
-								    data->width);
-			else
-			    pixman_composeFunctions.combineMaskU (src_buffer,
-								  mask_buffer,
-								  data->width);
-		    }
-
-		    src_mask_buffer = src_buffer;
-		}
-	    }
-	    else if (fetchMask)
-	    {
-		fetchMask (data->mask, data->xMask, data->yMask + i,
-			   data->width, mask_buffer, 0, 0);
-
-		if (wide)
-		    pixman_composeFunctions64.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
-		else
-		    pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
-
-		src_mask_buffer = mask_buffer;
-	    }
-
-	    if (store)
-	    {
-		/* fill dest into second half of scanline */
-		if (fetchDest)
-		    fetchDest (data->dest, data->xDest, data->yDest + i,
-			       data->width, dest_buffer, 0, 0);
-
-		/* blend */
-		compose (dest_buffer, src_mask_buffer, NULL, data->width);
-
-		/* write back */
-		store (data->dest, data->xDest, data->yDest + i, data->width,
-		       dest_buffer);
-	    }
-	    else
-	    {
-		/* blend */
-		compose (bits + (data->yDest + i) * stride +
-			 data->xDest,
-			 src_mask_buffer, NULL, data->width);
-	    }
-	}
-    }
 }
 
 #define SCANLINE_BUFFER_LENGTH 8192
commit 8b2e08d494c6da1512f44d0754b0f52a184cc6f3
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 17:53:48 2009 -0400

    Get rid of separate combineU and combineC types

diff --git a/pixman/combine.inc b/pixman/combine.inc
index 5a72ec4..8c0955a 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -635,8 +635,6 @@ fbCombineMaskAlphaC (const comp4_t *src, comp4_t *mask)
     *(mask) = a;
 }
 
-
-
 FASTCALL static void
 fbCombineClearC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
@@ -1192,7 +1190,7 @@ fbCombineConjointXorC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, i
     fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
 }
 
-static CombineFuncU pixman_fbCombineFuncU[] = {
+static CombineFunc pixman_fbCombineFuncU[] = {
     fbCombineClear,
     fbCombineSrcU,
     NULL, /* CombineDst */
@@ -1239,7 +1237,7 @@ static CombineFuncU pixman_fbCombineFuncU[] = {
     fbCombineConjointXorU,
 };
 
-static CombineFuncC pixman_fbCombineFuncC[] = {
+static CombineFunc pixman_fbCombineFuncC[] = {
     fbCombineClearC,
     fbCombineSrcC,
     NULL, /* Dest */
diff --git a/pixman/combine.pl b/pixman/combine.pl
index 6d27423..d50c480 100644
--- a/pixman/combine.pl
+++ b/pixman/combine.pl
@@ -60,9 +60,7 @@ while (<STDIN>) {
     s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/;
 
     # Add 32/64 suffix to combining function types.
-    s/\bCombineFuncC\b/CombineFuncC$pixel_size/;
-    s/\bCombineFuncU\b/CombineFuncU$pixel_size/;
-    s/\bCombineMaskU\b/CombineMaskU$pixel_size/;
+    s/\bCombineFunc\b/CombineFunc$pixel_size/;
     s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
 
     # Convert comp*_t values into the appropriate real types.
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 5f99c3e..9533b5f 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -108,13 +108,17 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 	data->mask->common.component_alpha &&
 	PIXMAN_FORMAT_RGB (data->mask->bits.format))
     {
-	CombineFuncC32 compose =
-	    wide ? (CombineFuncC32)pixman_composeFunctions64.combineC[data->op] :
+	CombineFunc32 compose =
+	    wide ? (CombineFunc32)pixman_composeFunctions64.combineC[data->op] :
 		   pixman_composeFunctions.combineC[data->op];
 	if (!compose)
 	    return;
 
-	for (i = 0; i < data->height; ++i) {
+	if (!fetchMask)
+	    mask_buffer = NULL;
+	
+	for (i = 0; i < data->height; ++i)
+	{
 	    /* fill first half of scanline with source */
 	    if (fetchSrc)
 	    {
@@ -175,8 +179,8 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
     {
 	void *src_mask_buffer = 0;
 	const int useMask = (fetchMask != NULL);
-	CombineFuncU32 compose =
-	    wide ? (CombineFuncU32)pixman_composeFunctions64.combineU[data->op] :
+	CombineFunc32 compose =
+	    wide ? (CombineFunc32)pixman_composeFunctions64.combineU[data->op] :
 		   pixman_composeFunctions.combineU[data->op];
 	if (!compose)
 	    return;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index d9b5579..db57467 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -152,9 +152,8 @@ typedef struct point point_t;
  */
 
 #define FASTCALL
-typedef FASTCALL void (*CombineMaskU32) (uint32_t *src, const uint32_t *mask, int width);
-typedef FASTCALL void (*CombineFuncU32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
-typedef FASTCALL void (*CombineFuncC32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
+typedef FASTCALL void (*CombineMask32) (uint32_t *src, const uint32_t *mask, int width);
+typedef FASTCALL void (*CombineFunc32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*fetchProc32)(bits_image_t *pict, int x, int y, int width,
                                      uint32_t *buffer);
 typedef FASTCALL uint32_t (*fetchPixelProc32)(bits_image_t *pict, int offset, int line);
@@ -162,9 +161,8 @@ typedef FASTCALL void (*storeProc32)(pixman_image_t *, uint32_t *bits,
                                      const uint32_t *values, int x, int width,
                                      const pixman_indexed_t *);
 
-typedef FASTCALL void (*CombineMaskU64) (uint64_t *src, const uint64_t *mask, int width);
-typedef FASTCALL void (*CombineFuncU64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
-typedef FASTCALL void (*CombineFuncC64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
+typedef FASTCALL void (*CombineMask64) (uint64_t *src, const uint64_t *mask, int width);
+typedef FASTCALL void (*CombineFunc64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*fetchProc64)(bits_image_t *pict, int x, int y, int width,
                                      uint64_t *buffer);
 typedef FASTCALL uint64_t (*fetchPixelProc64)(bits_image_t *pict, int offset, int line);
@@ -188,15 +186,15 @@ typedef struct _FbComposeData {
 } FbComposeData;
 
 typedef struct _FbComposeFunctions32 {
-    CombineFuncU32 *combineU;
-    CombineFuncC32 *combineC;
-    CombineMaskU32 combineMaskU;
+    CombineFunc32 *combineU;
+    CombineFunc32 *combineC;
+    CombineMask32 combineMaskU;
 } FbComposeFunctions32;
 
 typedef struct _FbComposeFunctions64 {
-    CombineFuncU64 *combineU;
-    CombineFuncC64 *combineC;
-    CombineMaskU64 combineMaskU;
+    CombineFunc64 *combineU;
+    CombineFunc64 *combineC;
+    CombineMask64 combineMaskU;
 } FbComposeFunctions64;
 
 extern FbComposeFunctions32 pixman_composeFunctions;
diff --git a/test/composite-test.c b/test/composite-test.c
index eaa8820..393e15d 100644
--- a/test/composite-test.c
+++ b/test/composite-test.c
@@ -54,7 +54,7 @@ main (int argc, char **argv)
 	src[i] = 0x7f7f0000; /* red */
 
     for (i = 0; i < WIDTH * HEIGHT; ++i)
-	dest[i] = 0x7f0000ff; /* blue */
+	dest[i] = 0x7f00007f; /* blue */
     
     src_img = pixman_image_create_bits (PIXMAN_a8r8g8b8,
 					WIDTH, HEIGHT,
commit 6d6e33d33818b56982f15da1943da499db220bc1
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 17:14:04 2009 -0400

    Change type of combineU to take a mask

diff --git a/pixman/combine.inc b/pixman/combine.inc
index 0b02f27..5a72ec4 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -36,25 +36,47 @@ pixman_fbCombineMaskU (comp4_t *src, const comp4_t *mask, int width)
  * All of the composing functions
  */
 
+static force_inline comp4_t
+combineMask (const comp4_t *src, const comp4_t *mask, int i)
+{
+    comp4_t s = *(src + i);
+
+    if (mask)
+    {
+	comp4_t m = *(mask + i) >> A_SHIFT;
+	
+	FbByteMul (s, m);
+    }
+    
+    return s;
+}
+
 FASTCALL static void
-fbCombineClear (comp4_t *dest, const comp4_t *src, int width)
+fbCombineClear (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     memset(dest, 0, width*sizeof(comp4_t));
 }
 
 FASTCALL static void
-fbCombineSrcU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineSrcU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    memcpy(dest, src, width*sizeof(comp4_t));
+    int i;
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combineMask (src, mask, i);
+
+	*(dest + i) = s;
+    }
 }
 
 /* if the Src is opaque, call fbCombineSrcU */
 FASTCALL static void
-fbCombineOverU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineOverU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t ia = Alpha(~s);
 
@@ -65,11 +87,11 @@ fbCombineOverU (comp4_t *dest, const comp4_t *src, int width)
 
 /* if the Dst is opaque, this is a noop */
 FASTCALL static void
-fbCombineOverReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineOverReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t ia = Alpha(~*(dest + i));
         FbByteMulAdd(s, ia, d);
@@ -79,11 +101,11 @@ fbCombineOverReverseU (comp4_t *dest, const comp4_t *src, int width)
 
 /* if the Dst is opaque, call fbCombineSrcU */
 FASTCALL static void
-fbCombineInU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineInU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t a = Alpha(*(dest + i));
         FbByteMul(s, a);
 	*(dest + i) = s;
@@ -92,12 +114,13 @@ fbCombineInU (comp4_t *dest, const comp4_t *src, int width)
 
 /* if the Src is opaque, this is a noop */
 FASTCALL static void
-fbCombineInReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineInReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t d = *(dest + i);
-        comp4_t a = Alpha(*(src + i));
+	comp4_t s = combineMask (src, mask, i);
+	comp4_t d = *(dest + i);
+        comp4_t a = Alpha(s);
         FbByteMul(d, a);
 	*(dest + i) = d;
     }
@@ -105,11 +128,11 @@ fbCombineInReverseU (comp4_t *dest, const comp4_t *src, int width)
 
 /* if the Dst is opaque, call fbCombineClear */
 FASTCALL static void
-fbCombineOutU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineOutU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t a = Alpha(~*(dest + i));
         FbByteMul(s, a);
 	*(dest + i) = s;
@@ -118,12 +141,13 @@ fbCombineOutU (comp4_t *dest, const comp4_t *src, int width)
 
 /* if the Src is opaque, call fbCombineClear */
 FASTCALL static void
-fbCombineOutReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineOutReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
+	comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
-        comp4_t a = Alpha(~*(src + i));
+        comp4_t a = Alpha(~s);
         FbByteMul(d, a);
 	*(dest + i) = d;
     }
@@ -133,11 +157,11 @@ fbCombineOutReverseU (comp4_t *dest, const comp4_t *src, int width)
 /* if the Dst is opaque, call fbCombineOverU */
 /* if both the Src and Dst are opaque, call fbCombineSrcU */
 FASTCALL static void
-fbCombineAtopU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineAtopU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t dest_a = Alpha(d);
         comp4_t src_ia = Alpha(~s);
@@ -151,11 +175,11 @@ fbCombineAtopU (comp4_t *dest, const comp4_t *src, int width)
 /* if the Dst is opaque, call fbCombineInReverseU */
 /* if both the Src and Dst are opaque, call fbCombineDstU */
 FASTCALL static void
-fbCombineAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineAtopReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t src_a = Alpha(s);
         comp4_t dest_ia = Alpha(~d);
@@ -169,11 +193,11 @@ fbCombineAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
 /* if the Dst is opaque, call fbCombineOverReverseU */
 /* if both the Src and Dst are opaque, call fbCombineClear */
 FASTCALL static void
-fbCombineXorU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineXorU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t src_ia = Alpha(~s);
         comp4_t dest_ia = Alpha(~d);
@@ -184,11 +208,11 @@ fbCombineXorU (comp4_t *dest, const comp4_t *src, int width)
 }
 
 FASTCALL static void
-fbCombineAddU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineAddU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         FbByteAdd(d, s);
 	*(dest + i) = d;
@@ -199,11 +223,11 @@ fbCombineAddU (comp4_t *dest, const comp4_t *src, int width)
 /* if the Dst is opaque, call fbCombineAddU */
 /* if both the Src and Dst are opaque, call fbCombineAddU */
 FASTCALL static void
-fbCombineSaturateU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineSaturateU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp2_t sa, da;
 
@@ -311,11 +335,11 @@ fbCombineConjointInPart (comp1_t a, comp1_t b)
 }
 
 FASTCALL static void
-fbCombineDisjointGeneralU (comp4_t *dest, const comp4_t *src, int width, comp1_t combine)
+fbCombineDisjointGeneralU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t m,n,o,p;
         comp2_t Fa, Fb, t, u, v;
@@ -361,11 +385,11 @@ fbCombineDisjointGeneralU (comp4_t *dest, const comp4_t *src, int width, comp1_t
 }
 
 FASTCALL static void
-fbCombineDisjointOverU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointOverU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp2_t a = s >> A_SHIFT;
 
         if (a != 0x00)
@@ -383,53 +407,53 @@ fbCombineDisjointOverU (comp4_t *dest, const comp4_t *src, int width)
 }
 
 FASTCALL static void
-fbCombineDisjointInU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointInU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineAIn);
 }
 
 FASTCALL static void
-fbCombineDisjointInReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointInReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineDisjointOutU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointOutU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineDisjointOutReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointOutReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointAtopU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointAtopReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointXorU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineDisjointXorU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
+    fbCombineDisjointGeneralU (dest, src, mask, width, CombineXor);
 }
 
 FASTCALL static void
-fbCombineConjointGeneralU (comp4_t *dest, const comp4_t *src, int width, comp1_t combine)
+fbCombineConjointGeneralU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
+        comp4_t s = combineMask (src, mask, i);
         comp4_t d = *(dest + i);
         comp4_t m,n,o,p;
         comp2_t Fa, Fb, t, u, v;
@@ -475,60 +499,60 @@ fbCombineConjointGeneralU (comp4_t *dest, const comp4_t *src, int width, comp1_t
 }
 
 FASTCALL static void
-fbCombineConjointOverU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointOverU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineAOver);
 }
 
 
 FASTCALL static void
-fbCombineConjointOverReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointOverReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineBOver);
 }
 
 
 FASTCALL static void
-fbCombineConjointInU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointInU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineAIn);
 }
 
 
 FASTCALL static void
-fbCombineConjointInReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointInReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineConjointOutU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointOutU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineConjointOutReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointOutReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineConjointAtopU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointAtopU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineConjointAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointAtopReverseU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineConjointXorU (comp4_t *dest, const comp4_t *src, int width)
+fbCombineConjointXorU (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
-    fbCombineConjointGeneralU (dest, src, width, CombineXor);
+    fbCombineConjointGeneralU (dest, src, mask, width, CombineXor);
 }
 
 /********************************************************************************/
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 7c6dc64..5f99c3e 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -204,9 +204,9 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 		    if (useMask)
 		    {
 			if (wide)
-			    pixman_composeFunctions64.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
+			    pixman_composeFunctions64.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
 			else
-			    pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
+			    pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
 
 			src_mask_buffer = mask_buffer;
 		    }
@@ -241,9 +241,9 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 			   data->width, mask_buffer, 0, 0);
 
 		if (wide)
-		    pixman_composeFunctions64.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
+		    pixman_composeFunctions64.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
 		else
-		    pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
+		    pixman_composeFunctions.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, NULL, data->width);
 
 		src_mask_buffer = mask_buffer;
 	    }
@@ -256,7 +256,7 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 			       data->width, dest_buffer, 0, 0);
 
 		/* blend */
-		compose (dest_buffer, src_mask_buffer, data->width);
+		compose (dest_buffer, src_mask_buffer, NULL, data->width);
 
 		/* write back */
 		store (data->dest, data->xDest, data->yDest + i, data->width,
@@ -267,7 +267,7 @@ pixman_composite_rect_general_internal (const FbComposeData *data,
 		/* blend */
 		compose (bits + (data->yDest + i) * stride +
 			 data->xDest,
-			 src_mask_buffer, data->width);
+			 src_mask_buffer, NULL, data->width);
 	    }
 	}
     }
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 844006c..8262cb1 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -891,6 +891,7 @@ fbComposeSetupMMX(void)
     /* check if we have MMX support and initialize accordingly */
     if (pixman_have_mmx())
     {
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = mmxCombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = mmxCombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = mmxCombineInU;
@@ -902,6 +903,7 @@ fbComposeSetupMMX(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = mmxCombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = mmxCombineAddU;
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = mmxCombineSaturateU;
+#endif
 
         pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = mmxCombineSrcC;
         pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = mmxCombineOverC;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 7554035..d9b5579 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -153,7 +153,7 @@ typedef struct point point_t;
 
 #define FASTCALL
 typedef FASTCALL void (*CombineMaskU32) (uint32_t *src, const uint32_t *mask, int width);
-typedef FASTCALL void (*CombineFuncU32) (uint32_t *dest, const uint32_t *src, int width);
+typedef FASTCALL void (*CombineFuncU32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*CombineFuncC32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*fetchProc32)(bits_image_t *pict, int x, int y, int width,
                                      uint32_t *buffer);
@@ -163,7 +163,7 @@ typedef FASTCALL void (*storeProc32)(pixman_image_t *, uint32_t *bits,
                                      const pixman_indexed_t *);
 
 typedef FASTCALL void (*CombineMaskU64) (uint64_t *src, const uint64_t *mask, int width);
-typedef FASTCALL void (*CombineFuncU64) (uint64_t *dest, const uint64_t *src, int width);
+typedef FASTCALL void (*CombineFuncU64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*CombineFuncC64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*fetchProc64)(bits_image_t *pict, int x, int y, int width,
                                      uint64_t *buffer);
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index bbc2324..13509c9 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -2321,6 +2321,7 @@ fbComposeSetupSSE2(void)
         xMaskAlpha = createMask_2x32_64 (0x00ff0000, 0x00000000);
 
         /* SSE code patch for fbcompose.c */
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = sse2CombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = sse2CombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = sse2CombineInU;
@@ -2334,6 +2335,7 @@ fbComposeSetupSSE2(void)
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = sse2CombineAddU;
 
         pixman_composeFunctions.combineU[PIXMAN_OP_SATURATE] = sse2CombineSaturateU;
+#endif
 
         pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = sse2CombineSrcC;
         pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = sse2CombineOverC;
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 6d4df4e..129cab7 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -1036,6 +1036,7 @@ void fbComposeSetupVMX (void)
 {
     /* check if we have VMX support and initialize accordingly */
     if (pixman_have_vmx ()) {
+#if 0
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = vmxCombineOverU;
         pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_IN] = vmxCombineInU;
@@ -1046,6 +1047,7 @@ void fbComposeSetupVMX (void)
         pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU;
         pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = vmxCombineXorU;
         pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = vmxCombineAddU;
+#endif
 
         pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = vmxCombineSrcC;
         pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = vmxCombineOverC;
commit fe571035f8889fd12892f2257b64536feced3f4e
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 19:50:24 2009 -0400

    Make combineC function type take const args
    
    Fix type of mmx component alpha combiners
    
    Fix type of sse2 component alpha combiners
    
    Fix type of vmx component alpha combiners

diff --git a/pixman/combine.inc b/pixman/combine.inc
index bcf7d5d..0b02f27 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -614,13 +614,13 @@ fbCombineMaskAlphaC (const comp4_t *src, comp4_t *mask)
 
 
 FASTCALL static void
-fbCombineClearC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineClearC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     memset(dest, 0, width*sizeof(comp4_t));
 }
 
 FASTCALL static void
-fbCombineSrcC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineSrcC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -635,7 +635,7 @@ fbCombineSrcC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineOverC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -661,7 +661,7 @@ fbCombineOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOverReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineOverReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -686,7 +686,7 @@ fbCombineOverReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineInC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -710,7 +710,7 @@ fbCombineInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineInReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -736,7 +736,7 @@ fbCombineInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineOutC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -761,7 +761,7 @@ fbCombineOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineOutReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -787,7 +787,7 @@ fbCombineOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineAtopC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -808,7 +808,7 @@ fbCombineAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineAtopReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -830,7 +830,7 @@ fbCombineAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineXorC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -851,7 +851,7 @@ fbCombineXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineAddC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineAddC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -868,7 +868,7 @@ fbCombineAddC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineSaturateC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineSaturateC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
 
@@ -915,7 +915,7 @@ fbCombineSaturateC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineDisjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width, comp1_t combine)
+fbCombineDisjointGeneralC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
 {
     int i;
 
@@ -991,55 +991,55 @@ fbCombineDisjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width
 }
 
 FASTCALL static void
-fbCombineDisjointOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointOverC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
 }
 
 FASTCALL static void
-fbCombineDisjointInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointInC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
 }
 
 FASTCALL static void
-fbCombineDisjointInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointInReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineDisjointOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointOutC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineDisjointOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointOutReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointAtopC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointAtopReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineDisjointXorC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
 }
 
 FASTCALL static void
-fbCombineConjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width, comp1_t combine)
+fbCombineConjointGeneralC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
 {
     int i;
 
@@ -1115,55 +1115,55 @@ fbCombineConjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width
 }
 
 FASTCALL static void
-fbCombineConjointOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointOverC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
 }
 
 FASTCALL static void
-fbCombineConjointOverReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointOverReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
 }
 
 FASTCALL static void
-fbCombineConjointInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointInC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
 }
 
 FASTCALL static void
-fbCombineConjointInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointInReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineConjointOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointOutC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineConjointOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointOutReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineConjointAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointAtopC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineConjointAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointAtopReverseC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineConjointXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
+fbCombineConjointXorC (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
 }
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 7d0a802..844006c 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -663,7 +663,7 @@ mmxCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
 
 
 static FASTCALL void
-mmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineSrcC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -679,7 +679,7 @@ mmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineOverC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -698,7 +698,7 @@ mmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineOverReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -718,7 +718,7 @@ mmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width
 
 
 static FASTCALL void
-mmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineInC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -737,7 +737,7 @@ mmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineInReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -756,7 +756,7 @@ mmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineOutC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -776,7 +776,7 @@ mmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineOutReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -796,7 +796,7 @@ mmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineAtopC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -818,7 +818,7 @@ mmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineAtopReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -840,7 +840,7 @@ mmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width
 }
 
 static FASTCALL void
-mmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineXorC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
@@ -863,7 +863,7 @@ mmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-mmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+mmxCombineAddC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     const uint32_t *end = src + width;
     while (src < end) {
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 4ef915d..7554035 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -154,7 +154,7 @@ typedef struct point point_t;
 #define FASTCALL
 typedef FASTCALL void (*CombineMaskU32) (uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*CombineFuncU32) (uint32_t *dest, const uint32_t *src, int width);
-typedef FASTCALL void (*CombineFuncC32) (uint32_t *dest, uint32_t *src, uint32_t *mask, int width);
+typedef FASTCALL void (*CombineFuncC32) (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*fetchProc32)(bits_image_t *pict, int x, int y, int width,
                                      uint32_t *buffer);
 typedef FASTCALL uint32_t (*fetchPixelProc32)(bits_image_t *pict, int offset, int line);
@@ -164,7 +164,7 @@ typedef FASTCALL void (*storeProc32)(pixman_image_t *, uint32_t *bits,
 
 typedef FASTCALL void (*CombineMaskU64) (uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*CombineFuncU64) (uint64_t *dest, const uint64_t *src, int width);
-typedef FASTCALL void (*CombineFuncC64) (uint64_t *dest, uint64_t *src, uint64_t *mask, int width);
+typedef FASTCALL void (*CombineFuncC64) (uint64_t *dest, const uint64_t *src, const uint64_t *mask, int width);
 typedef FASTCALL void (*fetchProc64)(bits_image_t *pict, int x, int y, int width,
                                      uint64_t *buffer);
 typedef FASTCALL uint64_t (*fetchPixelProc64)(bits_image_t *pict, int offset, int line);
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 92872d0..bbc2324 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -1452,7 +1452,7 @@ coreCombineOverReverseCsse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *p
 }
 
 static force_inline void
-coreCombineInCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineInCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -1523,7 +1523,7 @@ coreCombineInCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
 }
 
 static force_inline void
-coreCombineInReverseCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineInReverseCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -1596,7 +1596,7 @@ coreCombineInReverseCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
 }
 
 static force_inline void
-coreCombineOutCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineOutCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -1668,7 +1668,7 @@ coreCombineOutCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
 }
 
 static force_inline void
-coreCombineOutReverseCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineOutReverseCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -1759,7 +1759,7 @@ coreCombineAtopCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst)
 }
 
 static force_inline void
-coreCombineAtopCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineAtopCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -1852,7 +1852,7 @@ coreCombineReverseAtopCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst)
 }
 
 static force_inline void
-coreCombineReverseAtopCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineReverseAtopCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -1946,7 +1946,7 @@ coreCombineXorCPixelsse2 (uint32_t src, uint32_t mask, uint32_t dst)
 }
 
 static force_inline void
-coreCombineXorCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineXorCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -2024,7 +2024,7 @@ coreCombineXorCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
 }
 
 static force_inline void
-coreCombineAddCsse2 (uint32_t *pd, uint32_t *ps, uint32_t *pm, int w)
+coreCombineAddCsse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
 {
     uint32_t s, m, d;
 
@@ -2207,77 +2207,77 @@ sse2CombineSaturateU (uint32_t *dst, const uint32_t *src, int width)
 }
 
 static FASTCALL void
-sse2CombineSrcC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineSrcC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineSrcCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOverC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineOverC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineOverCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOverReverseC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineOverReverseC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineOverReverseCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineInC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineInC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineInCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineInReverseC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineInReverseC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineInReverseCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOutC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineOutC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineOutCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineOutReverseC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineOutReverseC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineOutReverseCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineAtopC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineAtopC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineAtopCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineAtopReverseC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineAtopReverseC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineReverseAtopCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineXorC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineXorC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineXorCsse2 (dst, src, mask, width);
     _mm_empty();
 }
 
 static FASTCALL void
-sse2CombineAddC (uint32_t *dst, uint32_t *src, uint32_t *mask, int width)
+sse2CombineAddC (uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
 {
     coreCombineAddCsse2 (dst, src, mask, width);
     _mm_empty();
@@ -2578,7 +2578,7 @@ fbCompositeSolidMask_nx8888x8888Csse2 (pixman_op_t op,
     while (height--)
     {
         int w = width;
-        uint32_t *pm = (uint32_t *)maskLine;
+        const uint32_t *pm = (uint32_t *)maskLine;
         uint32_t *pd = (uint32_t *)dstLine;
 
         dstLine += dstStride;
@@ -4411,14 +4411,14 @@ fbCompositeSrcAdd_8888x8888sse2 (pixman_op_t 	op,
 
 pixman_bool_t
 pixmanBltsse2 (uint32_t *src_bits,
-		uint32_t *dst_bits,
-		int src_stride,
-		int dst_stride,
-		int src_bpp,
-		int dst_bpp,
-		int src_x, int src_y,
-		int dst_x, int dst_y,
-		int width, int height)
+	       uint32_t *dst_bits,
+	       int src_stride,
+	       int dst_stride,
+	       int src_bpp,
+	       int dst_bpp,
+	       int src_x, int src_y,
+	       int dst_x, int dst_y,
+	       int width, int height)
 {
     uint8_t *	src_bytes;
     uint8_t *	dst_bytes;
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 6af23c4..6d4df4e 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -567,7 +567,7 @@ vmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
 }
 
 static FASTCALL void
-vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineSrcC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -598,7 +598,7 @@ vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineOverC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -631,7 +631,7 @@ vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineOverReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -665,7 +665,7 @@ vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width
 }
 
 static FASTCALL void
-vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineInC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -699,7 +699,7 @@ vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineInReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -733,7 +733,7 @@ vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineOutC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -768,7 +768,7 @@ vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineOutReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -804,7 +804,7 @@ vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineAtopC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -845,7 +845,7 @@ vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineAtopReverseC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -886,7 +886,7 @@ vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width
 }
 
 static FASTCALL void
-vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineXorC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
@@ -927,7 +927,7 @@ vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 static FASTCALL void
-vmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+vmxCombineAddC (uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
 {
     int i;
     vector unsigned int  vdest, vsrc, vmask;
commit f9a9ce8940c5644f25721abe6af6c72c4eabcfe7
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:54:32 2009 -0400

    Remove accessor version of pixman-compose.c

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 7fdb085..f271786 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -15,7 +15,6 @@ libpixman_1_la_SOURCES =		\
 	pixman-combine64.c		\
 	pixman-combine64.h		\
 	pixman-compose.c		\
-	pixman-compose-accessors.c	\
 	pixman-pict.c			\
 	pixman-source.c			\
 	pixman-transformed.c		\
diff --git a/pixman/pixman-compose-accessors.c b/pixman/pixman-compose-accessors.c
deleted file mode 100644
index 5393cf4..0000000
--- a/pixman/pixman-compose-accessors.c
+++ /dev/null
@@ -1,4 +0,0 @@
-
-#define PIXMAN_FB_ACCESSORS
-
-#include "pixman-compose.c"
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 9594abd..7c6dc64 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -36,19 +36,10 @@
 
 #include "pixman-private.h"
 
-#ifdef PIXMAN_FB_ACCESSORS
-#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
-#else
-#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
-#endif
-
-#ifndef PIXMAN_FB_ACCESSORS
-static
-#endif
-void
-PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
-			       void *src_buffer, void *mask_buffer, 
-			       void *dest_buffer, const int wide)
+static void
+pixman_composite_rect_general_internal (const FbComposeData *data,
+					void *src_buffer, void *mask_buffer, 
+					void *dest_buffer, const int wide)
 {
     int i;
     scanStoreProc store;
@@ -86,19 +77,18 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
     store = _pixman_image_get_storer (data->dest, wide);
 
-#ifndef PIXMAN_FB_ACCESSORS
     // Skip the store step and composite directly into the
     // destination if the output format of the compose func matches
     // the destination format.
     if (!wide &&
 	!data->dest->common.alpha_map &&
+	!data->dest->common.write_func && 
 	(data->op == PIXMAN_OP_ADD || data->op == PIXMAN_OP_OVER) &&
 	(data->dest->bits.format == PIXMAN_a8r8g8b8 ||
 	 data->dest->bits.format == PIXMAN_x8r8g8b8))
     {
 	store = NULL;
     }
-#endif
 
     if (!store)
     {
@@ -283,8 +273,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     }
 }
 
-#ifndef PIXMAN_FB_ACCESSORS
-
 #define SCANLINE_BUFFER_LENGTH 8192
 
 void
@@ -314,25 +302,10 @@ pixman_composite_rect_general (const FbComposeData *data)
     mask_buffer = src_buffer + data->width * Bpp;
     dest_buffer = mask_buffer + data->width * Bpp;
 
-    if (data->src->common.read_func			||
-	data->src->common.write_func			||
-	(data->mask && data->mask->common.read_func)	||
-	(data->mask && data->mask->common.write_func)	||
-	data->dest->common.read_func			||
-	data->dest->common.write_func)
-    {
-	pixman_composite_rect_general_accessors (data, src_buffer, mask_buffer,
-						 dest_buffer, wide);
-    }
-    else
-    {
-	pixman_composite_rect_general_no_accessors (data, src_buffer,
-						    mask_buffer, dest_buffer,
-						    wide);
-    }
+    pixman_composite_rect_general_internal (data, src_buffer,
+					    mask_buffer, dest_buffer,
+					    wide);
 
     if (scanline_buffer != stack_scanline_buffer)
 	free (scanline_buffer);
 }
-
-#endif
commit 0236393b031798a36144820a6254b646f9279580
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:51:09 2009 -0400

    Remove unused xoff/yoff variables

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index cdb30a2..9594abd 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -55,7 +55,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
     uint32_t *bits;
     int32_t stride;
-    int xoff, yoff;
     source_pict_class_t srcClass, maskClass;
 
     srcClass = _pixman_image_classify (data->src,
@@ -105,13 +104,11 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     {
 	bits = data->dest->bits.bits;
 	stride = data->dest->bits.rowstride;
-	xoff = yoff = 0;
     }
     else
     {
 	bits = NULL;
 	stride = 0;
-	xoff = yoff = 0;
     }
 
     if (fetchSrc		   &&
@@ -178,8 +175,8 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    else
 	    {
 		/* blend */
-		compose (bits + (data->yDest + i+ yoff) * stride +
-			 data->xDest + xoff,
+		compose (bits + (data->yDest + i) * stride +
+			 data->xDest,
 			 src_buffer, mask_buffer, data->width);
 	    }
 	}
@@ -278,8 +275,8 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    else
 	    {
 		/* blend */
-		compose (bits + (data->yDest + i+ yoff) * stride +
-			 data->xDest + xoff,
+		compose (bits + (data->yDest + i) * stride +
+			 data->xDest,
 			 src_mask_buffer, data->width);
 	    }
 	}
commit d0a6c1e9a5447e982dc4d544146c1b5234e490cf
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:50:18 2009 -0400

    Move store logic into pixman-image.c

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 701edd4..cdb30a2 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -42,50 +42,6 @@
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 #endif
 
-static void
-fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t *bits;
-    int32_t stride;
-    storeProc32 store = ACCESS(pixman_storeProcForPicture32)(pict);
-    const pixman_indexed_t * indexed = pict->indexed;
-
-    bits = pict->bits;
-    stride = pict->rowstride;
-    bits += y*stride;
-    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
-}
-
-static void
-fbStore64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
-{
-    uint32_t *bits;
-    int32_t stride;
-    storeProc64 store = ACCESS(pixman_storeProcForPicture64)(pict);
-    const pixman_indexed_t * indexed = pict->indexed;
-
-    bits = pict->bits;
-    stride = pict->rowstride;
-    bits += y*stride;
-    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
-}
-
-static inline scanStoreProc get_store(const int wide)
-{
-    if (wide)
-	return (scanStoreProc)fbStore64;
-    else
-	return (scanStoreProc)fbStore;
-}
-
-static inline scanStoreProc get_store_external_alpha(const int wide)
-{
-    if (wide)
-	return (scanStoreProc)ACCESS(fbStoreExternalAlpha64);
-    else
-	return (scanStoreProc)ACCESS(fbStoreExternalAlpha);
-}
-
 #ifndef PIXMAN_FB_ACCESSORS
 static
 #endif
@@ -129,27 +85,21 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     else
 	fetchDest = _pixman_image_get_fetcher (data->dest, wide);
 
-    if (data->dest->common.alpha_map)
-    {
-	store = get_store_external_alpha(wide);
-    }
-    else
-    {
-	store = get_store(wide);
+    store = _pixman_image_get_storer (data->dest, wide);
 
 #ifndef PIXMAN_FB_ACCESSORS
-	// Skip the store step and composite directly into the
-	// destination if the output format of the compose func matches
-	// the destination format.
-	if (!wide &&
-	    (data->op == PIXMAN_OP_ADD || data->op == PIXMAN_OP_OVER) &&
-	    (data->dest->bits.format == PIXMAN_a8r8g8b8 ||
-	     data->dest->bits.format == PIXMAN_x8r8g8b8))
-	{
-	    store = NULL;
-	}
-#endif
+    // Skip the store step and composite directly into the
+    // destination if the output format of the compose func matches
+    // the destination format.
+    if (!wide &&
+	!data->dest->common.alpha_map &&
+	(data->op == PIXMAN_OP_ADD || data->op == PIXMAN_OP_OVER) &&
+	(data->dest->bits.format == PIXMAN_a8r8g8b8 ||
+	 data->dest->bits.format == PIXMAN_x8r8g8b8))
+    {
+	store = NULL;
     }
+#endif
 
     if (!store)
     {
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index cecd02c..4bc47dc 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -274,6 +274,56 @@ _pixman_image_get_fetcher (pixman_image_t *image,
     }
 }
 
+#define WRITE_ACCESS(f) ((image->common.write_func)? f##_accessors : f)
+
+static void
+fbStore(bits_image_t * image, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t *bits;
+    int32_t stride;
+    storeProc32 store = WRITE_ACCESS(pixman_storeProcForPicture32)(image);
+    const pixman_indexed_t * indexed = image->indexed;
+
+    bits = image->bits;
+    stride = image->rowstride;
+    bits += y*stride;
+    store((pixman_image_t *)image, bits, buffer, x, width, indexed);
+}
+
+static void
+fbStore64(bits_image_t * image, int x, int y, int width, uint64_t *buffer)
+{
+    uint32_t *bits;
+    int32_t stride;
+    storeProc64 store = WRITE_ACCESS(pixman_storeProcForPicture64)(image);
+    const pixman_indexed_t * indexed = image->indexed;
+
+    bits = image->bits;
+    stride = image->rowstride;
+    bits += y*stride;
+    store((pixman_image_t *)image, bits, buffer, x, width, indexed);
+}
+
+scanStoreProc
+_pixman_image_get_storer (pixman_image_t *image,
+			  int             wide)
+{
+    if (image->common.alpha_map)
+    {
+	if (wide)
+	    return (scanStoreProc)WRITE_ACCESS(fbStoreExternalAlpha64);
+	else
+	    return (scanStoreProc)WRITE_ACCESS(fbStoreExternalAlpha);
+    }
+    else
+    {
+	if (wide)
+	    return (scanStoreProc)fbStore64;
+	else
+	    return (scanStoreProc)fbStore;
+    }
+}
+
 /* Ref Counting */
 PIXMAN_EXPORT pixman_image_t *
 pixman_image_ref (pixman_image_t *image)
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index e7fbb58..4ef915d 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -303,7 +303,9 @@ scanFetchProc
 _pixman_image_get_fetcher (pixman_image_t *image,
 			   int             wide);
 
-
+scanStoreProc
+_pixman_image_get_storer (pixman_image_t *image,
+			  int             wide);
 
 struct point
 {
commit 363be5285950d20cc77cf4a7eb50d5f1f5fea0f7
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:42:47 2009 -0400

    Move fetch logic to pixman-image.c

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 1f9a689..701edd4 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -42,46 +42,6 @@
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 #endif
 
-static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-    uint32_t color;
-    uint32_t *end;
-    fetchPixelProc32 fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
-
-    color = fetch(pict, 0, 0);
-
-    end = buffer + width;
-    while (buffer < end)
-	*(buffer++) = color;
-}
-
-static void fbFetchSolid64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, void *unused, uint32_t unused2)
-{
-    uint64_t color;
-    uint64_t *end;
-    fetchPixelProc64 fetch = ACCESS(pixman_fetchPixelProcForPicture64)(pict);
-
-    color = fetch(pict, 0, 0);
-
-    end = buffer + width;
-    while (buffer < end)
-	*(buffer++) = color;
-}
-
-static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-    fetchProc32 fetch = ACCESS(pixman_fetchProcForPicture32)(pict);
-
-    fetch(pict, x, y, width, buffer);
-}
-
-static void fbFetch64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer, void *unused, uint32_t unused2)
-{
-    fetchProc64 fetch = ACCESS(pixman_fetchProcForPicture64)(pict);
-
-    fetch(pict, x, y, width, buffer);
-}
-
 static void
 fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
@@ -110,50 +70,6 @@ fbStore64(bits_image_t * pict, int x, int y, int width, uint64_t *buffer)
     store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 }
 
-typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
-typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
-			      uint32_t *, uint32_t);
-
-static inline scanFetchProc get_fetch_source_pict(const int wide)
-{
-    if (wide)
-	return (scanFetchProc)pixmanFetchSourcePict64;
-    else
-	return (scanFetchProc)pixmanFetchSourcePict;
-}
-
-static inline scanFetchProc get_fetch_solid(const int wide)
-{
-    if (wide)
-	return (scanFetchProc)fbFetchSolid64;
-    else
-	return (scanFetchProc)fbFetchSolid;
-}
-
-static inline scanFetchProc get_fetch(const int wide)
-{
-    if (wide)
-	return (scanFetchProc)fbFetch64;
-    else
-	return (scanFetchProc)fbFetch;
-}
-
-static inline scanFetchProc get_fetch_external_alpha(const int wide)
-{
-    if (wide)
-	return (scanFetchProc)ACCESS(fbFetchExternalAlpha64);
-    else
-	return (scanFetchProc)ACCESS(fbFetchExternalAlpha);
-}
-
-static inline scanFetchProc get_fetch_transformed(const int wide)
-{
-    if (wide)
-	return (scanFetchProc)ACCESS(fbFetchTransformed64);
-    else
-	return (scanFetchProc)ACCESS(fbFetchTransformed);
-}
-
 static inline scanStoreProc get_store(const int wide)
 {
     if (wide)
@@ -170,39 +86,6 @@ static inline scanStoreProc get_store_external_alpha(const int wide)
 	return (scanStoreProc)ACCESS(fbStoreExternalAlpha);
 }
 
-static scanFetchProc
-get_fetcher (pixman_image_t *image, int wide)
-{
-    if (IS_SOURCE_IMAGE (image))
-    {
-	return get_fetch_source_pict(wide);
-    }
-    else
-    {
-	bits_image_t *bits = (bits_image_t *)image;
-
-	if (bits->common.alpha_map)
-	{
-	    return get_fetch_external_alpha(wide);
-	}
-	else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) &&
-		 bits->width == 1 &&
-		 bits->height == 1)
-	{
-	    return get_fetch_solid(wide);
-	}
-	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
-                && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
-	{
-	    return get_fetch(wide);
-	}
-	else
-	{
-	    return get_fetch_transformed(wide);
-	}
-    }
-}
-
 #ifndef PIXMAN_FB_ACCESSORS
 static
 #endif
@@ -234,22 +117,24 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     if (data->op == PIXMAN_OP_CLEAR)
         fetchSrc = NULL;
     else
-	fetchSrc = get_fetcher (data->src, wide);
+	fetchSrc = _pixman_image_get_fetcher (data->src, wide);
 
     if (!data->mask || data->op == PIXMAN_OP_CLEAR)
 	fetchMask = NULL;
     else
-	fetchMask = get_fetcher (data->mask, wide);
+	fetchMask = _pixman_image_get_fetcher (data->mask, wide);
+
+    if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
+	fetchDest = NULL;
+    else
+	fetchDest = _pixman_image_get_fetcher (data->dest, wide);
 
     if (data->dest->common.alpha_map)
     {
-	fetchDest = get_fetch_external_alpha(wide);
 	store = get_store_external_alpha(wide);
-
     }
     else
     {
-	fetchDest = get_fetch(wide);
 	store = get_store(wide);
 
 #ifndef PIXMAN_FB_ACCESSORS
@@ -266,9 +151,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 #endif
     }
 
-    if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
-	fetchDest = NULL;
-
     if (!store)
     {
 	bits = data->dest->bits.bits;
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 819d29a..cecd02c 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -183,6 +183,97 @@ _pixman_image_classify (pixman_image_t *image,
 	return SOURCE_IMAGE_CLASS_UNKNOWN;
 }
 
+#define READ_ACCESS(f) ((image->common.read_func)? f##_accessors : f)
+
+static void fbFetchSolid(bits_image_t * image, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+    uint32_t color;
+    uint32_t *end;
+    fetchPixelProc32 fetch = READ_ACCESS(pixman_fetchPixelProcForPicture32)(image);
+
+    color = fetch(image, 0, 0);
+
+    end = buffer + width;
+    while (buffer < end)
+	*(buffer++) = color;
+}
+
+static void fbFetchSolid64(bits_image_t * image, int x, int y, int width, uint64_t *buffer, void *unused, uint32_t unused2)
+{
+    uint64_t color;
+    uint64_t *end;
+    fetchPixelProc64 fetch = READ_ACCESS(pixman_fetchPixelProcForPicture64)(image);
+
+    color = fetch(image, 0, 0);
+
+    end = buffer + width;
+    while (buffer < end)
+	*(buffer++) = color;
+}
+
+static void fbFetch(bits_image_t * image, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+    fetchProc32 fetch = READ_ACCESS(pixman_fetchProcForPicture32)(image);
+
+    fetch(image, x, y, width, buffer);
+}
+
+static void fbFetch64(bits_image_t * image, int x, int y, int width, uint64_t *buffer, void *unused, uint32_t unused2)
+{
+    fetchProc64 fetch = READ_ACCESS(pixman_fetchProcForPicture64)(image);
+
+    fetch(image, x, y, width, buffer);
+}
+
+scanFetchProc
+_pixman_image_get_fetcher (pixman_image_t *image,
+			   int             wide)
+{
+    if (IS_SOURCE_IMAGE (image))
+    {
+	if (wide)
+	    return (scanFetchProc)pixmanFetchSourcePict64;
+	else
+	    return (scanFetchProc)pixmanFetchSourcePict;
+    }
+    else
+    {
+	bits_image_t *bits = (bits_image_t *)image;
+
+	if (bits->common.alpha_map)
+	{
+	    if (wide)
+		return (scanFetchProc)READ_ACCESS(fbFetchExternalAlpha64);
+	    else
+		return (scanFetchProc)READ_ACCESS(fbFetchExternalAlpha);
+	}
+	else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) &&
+		 bits->width == 1 &&
+		 bits->height == 1)
+	{
+	    if (wide)
+		return (scanFetchProc)fbFetchSolid64;
+	    else
+		return (scanFetchProc)fbFetchSolid;
+	}
+	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
+                && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
+	{
+	    if (wide)
+		return (scanFetchProc)fbFetch64;
+	    else
+		return (scanFetchProc)fbFetch;
+	}
+	else
+	{
+	    if (wide)
+		return (scanFetchProc)READ_ACCESS(fbFetchTransformed64);
+	    else
+		return (scanFetchProc)READ_ACCESS(fbFetchTransformed);
+	}
+    }
+}
+
 /* Ref Counting */
 PIXMAN_EXPORT pixman_image_t *
 pixman_image_ref (pixman_image_t *image)
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 3049c7c..e7fbb58 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -289,12 +289,22 @@ typedef source_pict_class_t (* classify_func_t) (pixman_image_t *image,
 						 int             width,
 						 int             height);
 
+typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
+typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
+			      uint32_t *, uint32_t);
+
 source_pict_class_t _pixman_image_classify (pixman_image_t *image,
 					    int             x,
 					    int             y,
 					    int             width,
 					    int             height);
 
+scanFetchProc
+_pixman_image_get_fetcher (pixman_image_t *image,
+			   int             wide);
+
+
+
 struct point
 {
     int16_t x, y;
commit bf879f1b37cfe5ee2ec921d26bf9d9126ca59b9c
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:25:48 2009 -0400

    Simplify logic to compute store function

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 577b489..1f9a689 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -246,40 +246,29 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	fetchDest = get_fetch_external_alpha(wide);
 	store = get_store_external_alpha(wide);
 
-	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
-	    fetchDest = NULL;
     }
     else
     {
 	fetchDest = get_fetch(wide);
 	store = get_store(wide);
 
-	switch (data->op)
-	{
-	case PIXMAN_OP_CLEAR:
-	case PIXMAN_OP_SRC:
-	    fetchDest = NULL;
 #ifndef PIXMAN_FB_ACCESSORS
-	    /* fall-through */
-	case PIXMAN_OP_ADD:
-	case PIXMAN_OP_OVER:
-	    switch (data->dest->bits.format) {
-	    case PIXMAN_a8r8g8b8:
-	    case PIXMAN_x8r8g8b8:
-		// Skip the store step and composite directly into the
-		// destination if the output format of the compose func matches
-		// the destination format.
-		if (!wide)
-		    store = NULL;
-		break;
-	    default:
-		break;
-	    }
-#endif
-	    break;
+	// Skip the store step and composite directly into the
+	// destination if the output format of the compose func matches
+	// the destination format.
+	if (!wide &&
+	    (data->op == PIXMAN_OP_ADD || data->op == PIXMAN_OP_OVER) &&
+	    (data->dest->bits.format == PIXMAN_a8r8g8b8 ||
+	     data->dest->bits.format == PIXMAN_x8r8g8b8))
+	{
+	    store = NULL;
 	}
+#endif
     }
 
+    if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
+	fetchDest = NULL;
+
     if (!store)
     {
 	bits = data->dest->bits.bits;
commit 20cedd756f54bc735fe25ab29aafd3cdfeddda30
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:14:13 2009 -0400

    Formatting

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 627f5c3..577b489 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -237,13 +237,9 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	fetchSrc = get_fetcher (data->src, wide);
 
     if (!data->mask || data->op == PIXMAN_OP_CLEAR)
-    {
 	fetchMask = NULL;
-    }
     else
-    {
 	fetchMask = get_fetcher (data->mask, wide);
-    }
 
     if (data->dest->common.alpha_map)
     {
commit 4c74f519ca3788fe357caf54e22e6cab609b681e
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:08:44 2009 -0400

    Factor out duplicated fetcher finding code

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 68b5683..627f5c3 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -170,6 +170,39 @@ static inline scanStoreProc get_store_external_alpha(const int wide)
 	return (scanStoreProc)ACCESS(fbStoreExternalAlpha);
 }
 
+static scanFetchProc
+get_fetcher (pixman_image_t *image, int wide)
+{
+    if (IS_SOURCE_IMAGE (image))
+    {
+	return get_fetch_source_pict(wide);
+    }
+    else
+    {
+	bits_image_t *bits = (bits_image_t *)image;
+
+	if (bits->common.alpha_map)
+	{
+	    return get_fetch_external_alpha(wide);
+	}
+	else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) &&
+		 bits->width == 1 &&
+		 bits->height == 1)
+	{
+	    return get_fetch_solid(wide);
+	}
+	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
+                && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
+	{
+	    return get_fetch(wide);
+	}
+	else
+	{
+	    return get_fetch_transformed(wide);
+	}
+    }
+}
+
 #ifndef PIXMAN_FB_ACCESSORS
 static
 #endif
@@ -200,34 +233,8 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     
     if (data->op == PIXMAN_OP_CLEAR)
         fetchSrc = NULL;
-    else if (IS_SOURCE_IMAGE (data->src))
-    {
-	fetchSrc = get_fetch_source_pict(wide);
-    }
     else
-    {
-	bits_image_t *bits = (bits_image_t *)data->src;
-
-	if (bits->common.alpha_map)
-	{
-	    fetchSrc = get_fetch_external_alpha(wide);
-	}
-	else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) &&
-		 bits->width == 1 &&
-		 bits->height == 1)
-	{
-	    fetchSrc = get_fetch_solid(wide);
-	}
-	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
-                && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
-	{
-	    fetchSrc = get_fetch(wide);
-	}
-	else
-	{
-	    fetchSrc = get_fetch_transformed(wide);
-	}
-    }
+	fetchSrc = get_fetcher (data->src, wide);
 
     if (!data->mask || data->op == PIXMAN_OP_CLEAR)
     {
@@ -235,29 +242,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     }
     else
     {
-	if (IS_SOURCE_IMAGE (data->mask))
-	{
-	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
-	}
-	else
-	{
-	    bits_image_t *bits = (bits_image_t *)data->mask;
-
-	    if (bits->common.alpha_map)
-	    {
-		fetchMask = get_fetch_external_alpha(wide);
-	    }
-	    else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) &&
-		     bits->width == 1 && bits->height == 1)
-	    {
-		fetchMask = get_fetch_solid(wide);
-	    }
-	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
-                    && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
-		fetchMask = get_fetch(wide);
-	    else
-		fetchMask = get_fetch_transformed(wide);
-	}
+	fetchMask = get_fetcher (data->mask, wide);
     }
 
     if (data->dest->common.alpha_map)
commit eb5d30a9d3bfb1bddaf9e60e2092353fe6b1dd48
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 02:03:03 2009 -0400

    Get rid of SOURCE_IMAGE_CLASS_NEITHER again

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index fbeb5ed..68b5683 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -190,7 +190,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 				       data->xSrc, data->ySrc,
 				       data->width, data->height);
 
-    maskClass = SOURCE_IMAGE_CLASS_NEITHER;
+    maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
     if (data->mask)
     {
 	maskClass = _pixman_image_classify (data->mask,
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 36a189a..819d29a 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -41,10 +41,7 @@ SourcePictureClassify (pixman_image_t *image,
 {
     source_image_t *pict = &image->source;
     
-    if (pict->class != SOURCE_IMAGE_CLASS_UNKNOWN)
-	return pict->class;
-
-    pict->class = SOURCE_IMAGE_CLASS_NEITHER;
+    pict->class = SOURCE_IMAGE_CLASS_UNKNOWN;
     
     if (pict->common.type == SOLID)
     {
@@ -84,7 +81,10 @@ SourcePictureClassify (pixman_image_t *image,
 	    if (pict->common.transform)
 	    {
 		if (!pixman_transform_point_3d (pict->common.transform, &v))
-		    return SOURCE_IMAGE_CLASS_UNKNOWN;
+		{
+		    pict->class = SOURCE_IMAGE_CLASS_UNKNOWN;
+		    goto out;
+		}
 	    }
 
 	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
@@ -96,6 +96,7 @@ SourcePictureClassify (pixman_image_t *image,
 	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
     }
 
+out:
     return pict->class;
 }
 
@@ -179,7 +180,7 @@ _pixman_image_classify (pixman_image_t *image,
     if (image->common.classify)
 	return image->common.classify (image, x, y, width, height);
     else
-	return SOURCE_IMAGE_CLASS_NEITHER;
+	return SOURCE_IMAGE_CLASS_UNKNOWN;
 }
 
 /* Ref Counting */
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index c2b7e44..3049c7c 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -281,7 +281,6 @@ typedef enum
     SOURCE_IMAGE_CLASS_UNKNOWN,
     SOURCE_IMAGE_CLASS_HORIZONTAL,
     SOURCE_IMAGE_CLASS_VERTICAL,
-    SOURCE_IMAGE_CLASS_NEITHER
 } source_pict_class_t;
 
 typedef source_pict_class_t (* classify_func_t) (pixman_image_t *image,
commit 87922006e506a252c81d42f0c1bacb59d1c67e60
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Thu Apr 30 01:49:13 2009 -0400

    More refactoring notes

diff --git a/pixman/refactor b/pixman/refactor
index ec72f73..c887539 100644
--- a/pixman/refactor
+++ b/pixman/refactor
@@ -1,21 +1,84 @@
+Roadmap
+
+See "Render Algorithm" below for rationale
+
+Images will eventually have these virtual functions:
+
+       get_scanline()
+       get_scanline_wide()
+       get_pixel()
+       get_pixel_wide()
+       get_untransformed_pixel()
+       get_untransformed_pixel_wide()
+       get_unfiltered_pixel()
+       get_unfiltered_pixel_wide()
+
+       store_scanline()
+       store_scanline_wide()
+
+1.
+
+Initially we will jsut have get_scanline() and get_scanline_wide();
+these will be based on the ones in pixman-compose. Hopefully this will
+reduce the complexity in pixman_composite_rect_general().
+
+Note that there is access considerations - the compose function is
+being compiled twice.
+
+
+2.
+
+Split image types into their own source files. Export noop virtual
+reinit() call.  Call this whenever a property of the image changes.
+
+
+3. 
+
+Split the get_scanline() call into smaller functions that are
+initialized by the reinit() call.
+
+
+
 The Render Algorithm:
 	(first repeat, then filter, then transform, then clip)
 
-starting from a destination pixel (x, y), do
+Starting from a destination pixel (x, y), do
 
 	1 x = x - xDst + xSrc
 	  y = y - yDst + ySrc
 
-	1.5 reject pixel that is outside the clip	// ie., clip is not affect by repeat or transform
-							// (This also answers the old FIXME in 
-							//  in pixman_compute_region()
-							// Also, if we are ignoring the hierarchy clip
-							// altogether, 
-	2 Transform pixel: (x, y) = T(x, y)
+	2 reject pixel that is outside the clip
+
+	This treats clipping as something that happens after
+	transformation, which I think is correct for client clips. For
+	hierarchy clips it is wrong, but who really cares? Without
+	GraphicsExposes hierarchy clips are basically irrelevant. Yes,
+	you could imagine cases where the pixels of a subwindow of a
+	redirected, transformed window should be treated as
+	transparent. I don't really care
 
-	3 Call p = GetUntransformedPixel (x, y)
+	Basically, I think the render spec should say that pixels that
+	are unavailable due to the hierarcy have undefined content,
+	and that GraphicsExposes are not generated. Ie., basically
+	that using non-redirected windows as sources is fail. This is
+	at least consistent with the current implementation and we can
+	update the spec later if someone makes it work.
 
-	4 If the image has an alpha map, then
+	The implication for render is that it should stop passing the
+	hierarchy clip to pixman. In pixman, if a souce image has a
+	clip it should be used in computing the composite region and
+	nowhere else, regardless of what "has_client_clip" says. The
+	default should be for there to not be any clip.
+
+	I would really like to get rid of the client clip as well for
+	source images, but unfortunately there is at least one
+	application in the wild that uses them.
+
+	3 Transform pixel: (x, y) = T(x, y)
+
+	4 Call p = GetUntransformedPixel (x, y)
+
+	5 If the image has an alpha map, then
 
 		Call GetUntransformedPixel (x, y) on the alpha map
 		
@@ -25,14 +88,14 @@ starting from a destination pixel (x, y), do
 
 	Where GetUnTransformedPixel is:
 
-	5 switch (filter)
+	6 switch (filter)
 	  {
 	  case NEAREST:
 		return GetUnfilteredPixel (x, y);
 		break;
 
 	  case BILINEAR:
-		return GetUnfilteredPixel (...) // 4 times + return.
+		return GetUnfilteredPixel (...) // 4 times 
 		break;
 
 	  case CONVOLUTION:
@@ -42,7 +105,7 @@ starting from a destination pixel (x, y), do
 
 	Where GetUnfilteredPixel (x, y) is
 
-	6 switch (repeat)
+	7 switch (repeat)
 	   {
 	   case REPEAT_NORMAL:
 	   case REPEAT_PAD:
@@ -60,7 +123,7 @@ starting from a destination pixel (x, y), do
 
 	Where GetRawPixel (x, y) is
 
-	7 Compute the pixel in question, depending on image type.
+	8 Compute the pixel in question, depending on image type.
 
 For gradients, repeat has a totally different meaning, so
 UnfilteredPixel() and RawPixel() must be the same function so that
@@ -79,7 +142,7 @@ So, the GetRawPixel
 It is then possible to build things like "get scanline" or "get tile" on
 top of this. In the simplest case, just repeatedly calling GetPixel()
 would work, but specialized get_scanline()s or get_tile()s could be
-plugged in for common cases.
+plugged in for common cases. 
 
 By not plugging anything in for images with access functions, we only
 have to compile the pixel functions twice, not the scanline functions.
@@ -161,6 +224,18 @@ issues are
   be declared in pixman-private.h. This should allow us to get rid
   of the pixman-mmx.h files.
 
+  The fast path table should describe each fast path. Ie there should
+  be bitfields indicating what things the fast path can handle, rather than
+  like now where it is only allowed to take one format per src/mask/dest. Ie., 
+
+  { 
+    FAST_a8r8g8b8 | FAST_x8r8g8b8,
+    FAST_null,
+    FAST_x8r8g8b8,
+    FAST_repeat_normal | FAST_repeat_none,
+    the_fast_path
+  }
+
 There should then be *one* file that implements pixman_image_composite(). 
 This should do this:
 
commit 8c646172743568584f7cefd3177b410fd3b22b2d
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed Apr 29 23:13:14 2009 -0400

    Add notes on how Render actually works

diff --git a/pixman/pixman-compute-region.c b/pixman/pixman-compute-region.c
index a93cee0..55bca18 100644
--- a/pixman/pixman-compute-region.c
+++ b/pixman/pixman-compute-region.c
@@ -98,7 +98,7 @@ miClipPictureSrc (pixman_region32_t *	pRegion,
 	 */
 	if (pPicture->common.has_client_clip)
 	{
-	    pixman_region32_translate ( pRegion, dx, dy);
+	    pixman_region32_translate (pRegion, dx, dy);
 	    
 	    if (!pixman_region32_intersect (pRegion, pRegion, 
 					    pPicture->common.src_clip))
diff --git a/pixman/refactor b/pixman/refactor
index b57e68a..ec72f73 100644
--- a/pixman/refactor
+++ b/pixman/refactor
@@ -1,6 +1,106 @@
+The Render Algorithm:
+	(first repeat, then filter, then transform, then clip)
+
+starting from a destination pixel (x, y), do
+
+	1 x = x - xDst + xSrc
+	  y = y - yDst + ySrc
+
+	1.5 reject pixel that is outside the clip	// ie., clip is not affect by repeat or transform
+							// (This also answers the old FIXME in 
+							//  in pixman_compute_region()
+							// Also, if we are ignoring the hierarchy clip
+							// altogether, 
+	2 Transform pixel: (x, y) = T(x, y)
+
+	3 Call p = GetUntransformedPixel (x, y)
+
+	4 If the image has an alpha map, then
+
+		Call GetUntransformedPixel (x, y) on the alpha map
+		
+		add resulting alpha channel to p
+
+	   return p
+
+	Where GetUnTransformedPixel is:
+
+	5 switch (filter)
+	  {
+	  case NEAREST:
+		return GetUnfilteredPixel (x, y);
+		break;
+
+	  case BILINEAR:
+		return GetUnfilteredPixel (...) // 4 times + return.
+		break;
+
+	  case CONVOLUTION:
+		return GetUnfilteredPixel (...) // as many times as necessary.
+		break;
+	  }
+
+	Where GetUnfilteredPixel (x, y) is
+
+	6 switch (repeat)
+	   {
+	   case REPEAT_NORMAL:
+	   case REPEAT_PAD:
+	   case REPEAT_REFLECT:
+		// adjust x, y as appropriate
+		break;
+
+	   case REPEAT_NONE:
+	        if (x, y) is outside image bounds
+		     return 0;
+		break;
+	   }
+
+	   return GetRawPixel(x, y)
+
+	Where GetRawPixel (x, y) is
+
+	7 Compute the pixel in question, depending on image type.
+
+For gradients, repeat has a totally different meaning, so
+UnfilteredPixel() and RawPixel() must be the same function so that
+gradients can do their own repeat algorithm.
+
+So, the GetRawPixel
+
+	for bits must deal with repeats
+	for gradients must deal with repeats (differently)
+	for solids, should ignore repeats.
+
+	for polygons, when we add them, either ignore repeats or do
+	something similar to bits (in which case, we may want an extra
+	layer of indirection to modify the coordinates).
+
+It is then possible to build things like "get scanline" or "get tile" on
+top of this. In the simplest case, just repeatedly calling GetPixel()
+would work, but specialized get_scanline()s or get_tile()s could be
+plugged in for common cases.
+
+By not plugging anything in for images with access functions, we only
+have to compile the pixel functions twice, not the scanline functions.
+
+And we can get rid of fetchers for the bizarre formats that no one
+uses. Such as b2g3r3 etc. r1g2b1? Seriously? It is also worth
+considering a generic format based pixel fetcher for these edge cases.
+
+Since the actual routines depend on the image attributes, the images
+must be notified when those change and update their function pointers
+appropriately. So there should probably be a virtual function called
+(* reinit) or something like that.
+
+There will also be wide fetchers for both pixels and lines. The line
+fetcher will just call the wide pixel fetcher. The wide pixel fetcher
+will just call expand, except for 10 bit formats.
+
 Refactoring pixman
 
-The pixman code is not particularly nice to put it mildly. Among the issues are
+The pixman code is not particularly nice to put it mildly. Among the
+issues are
 
 - inconsistent naming style (fb vs Fb, camelCase vs
   underscore_naming). Sometimes there is even inconsistency *within*
@@ -86,6 +186,9 @@ Issues that must be kept in mind:
 	 compositing happens directly in the destination
 	 whenever possible.
 
+	- It should be possible to create GPU samplers from the
+	  images.
+
 The "horizontal" classification should be a bit in the image, the
 "vertical" classification should just happen inside the gradient
 file. Note though that
@@ -100,3 +203,11 @@ file. Note though that
 Who is responsible for repeats? In principle it should be the scanline
 fetch. Right now NORMAL repeats are handled by walk_composite_region()
 while other repeats are handled by the scanline code.
+
+
+(Random note on filtering: do you filter before or after
+transformation?  Hardware is going to filter after transformation;
+this is also what pixman does currently). It's not completely clear
+what filtering *after* transformation means. One thing that might look
+good would be to do *supersampling*, ie., compute multiple subpixels
+per destination pixel, then average them together.
commit 57a3d09b01834103e61785c6269d152bdfd91a4f
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed Apr 29 20:15:20 2009 -0400

    Move calls of the classification out of the if statements.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index eb31617..fbeb5ed 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -181,20 +181,28 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     int i;
     scanStoreProc store;
     scanFetchProc fetchSrc = NULL, fetchMask = NULL, fetchDest = NULL;
-    unsigned int srcClass = SOURCE_IMAGE_CLASS_UNKNOWN;
-    unsigned int maskClass = SOURCE_IMAGE_CLASS_UNKNOWN;
     uint32_t *bits;
     int32_t stride;
     int xoff, yoff;
+    source_pict_class_t srcClass, maskClass;
 
+    srcClass = _pixman_image_classify (data->src,
+				       data->xSrc, data->ySrc,
+				       data->width, data->height);
+
+    maskClass = SOURCE_IMAGE_CLASS_NEITHER;
+    if (data->mask)
+    {
+	maskClass = _pixman_image_classify (data->mask,
+					    data->xSrc, data->ySrc,
+					    data->width, data->height);
+    }
+    
     if (data->op == PIXMAN_OP_CLEAR)
         fetchSrc = NULL;
     else if (IS_SOURCE_IMAGE (data->src))
     {
 	fetchSrc = get_fetch_source_pict(wide);
-	srcClass = _pixman_image_classify (data->src,
-					   data->xSrc, data->ySrc,
-					   data->width, data->height);
     }
     else
     {
@@ -209,7 +217,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		 bits->height == 1)
 	{
 	    fetchSrc = get_fetch_solid(wide);
-	    srcClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	}
 	else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
                 && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
@@ -231,9 +238,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	if (IS_SOURCE_IMAGE (data->mask))
 	{
 	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
-	    maskClass = _pixman_image_classify (data->mask,
-						data->xMask, data->yMask,
-						data->width, data->height);
 	}
 	else
 	{
@@ -247,7 +251,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		     bits->width == 1 && bits->height == 1)
 	    {
 		fetchMask = get_fetch_solid(wide);
-		maskClass = SOURCE_IMAGE_CLASS_HORIZONTAL;
 	    }
 	    else if (!bits->common.transform && bits->common.filter != PIXMAN_FILTER_CONVOLUTION
                     && bits->common.repeat != PIXMAN_REPEAT_PAD && bits->common.repeat != PIXMAN_REPEAT_REFLECT)
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index ec2e58e..36a189a 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -163,6 +163,7 @@ allocate_image (void)
 	common->ref_count = 1;
 	common->read_func = NULL;
 	common->write_func = NULL;
+	common->classify = NULL;
     }
 
     return image;
commit cb73922ab9ab7d627f059601a03714cfff07d25b
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed Apr 29 19:55:19 2009 -0400

    Move SourcePictClassify into pixman-image.c
    
    In preparation for making pixman_image_t more of a real object, move
    SourcePictClassify into pixman-image.c and expose it through a
    function pointer. Later, this function will be split into smaller
    functions depending on the exact type of the image.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 315206d..eb31617 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -42,66 +42,6 @@
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 #endif
 
-static unsigned int
-SourcePictureClassify (source_image_t *pict,
-		       int	       x,
-		       int	       y,
-		       int	       width,
-		       int	       height)
-{
-    if (pict->common.type == SOLID)
-    {
-	pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
-    }
-    else if (pict->common.type == LINEAR)
-    {
-	linear_gradient_t *linear = (linear_gradient_t *)pict;
-	pixman_vector_t   v;
-	pixman_fixed_32_32_t l;
-	pixman_fixed_48_16_t dx, dy, a, b, off;
-	pixman_fixed_48_16_t factors[4];
-	int	     i;
-
-	dx = linear->p2.x - linear->p1.x;
-	dy = linear->p2.y - linear->p1.y;
-	l = dx * dx + dy * dy;
-	if (l)
-	{
-	    a = (dx << 32) / l;
-	    b = (dy << 32) / l;
-	}
-	else
-	{
-	    a = b = 0;
-	}
-
-	off = (-a * linear->p1.x
-	       -b * linear->p1.y) >> 16;
-
-	for (i = 0; i < 3; i++)
-	{
-	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
-	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
-	    v.vector[2] = pixman_fixed_1;
-
-	    if (pict->common.transform)
-	    {
-		if (!pixman_transform_point_3d (pict->common.transform, &v))
-		    return SOURCE_IMAGE_CLASS_UNKNOWN;
-	    }
-
-	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
-	}
-
-	if (factors[2] == factors[0])
-	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
-	else if (factors[1] == factors[0])
-	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
-    }
-
-    return pict->class;
-}
-
 static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t color;
@@ -252,9 +192,9 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     else if (IS_SOURCE_IMAGE (data->src))
     {
 	fetchSrc = get_fetch_source_pict(wide);
-	srcClass = SourcePictureClassify ((source_image_t *)data->src,
-					  data->xSrc, data->ySrc,
-					  data->width, data->height);
+	srcClass = _pixman_image_classify (data->src,
+					   data->xSrc, data->ySrc,
+					   data->width, data->height);
     }
     else
     {
@@ -291,9 +231,9 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	if (IS_SOURCE_IMAGE (data->mask))
 	{
 	    fetchMask = (scanFetchProc)pixmanFetchSourcePict;
-	    maskClass = SourcePictureClassify ((source_image_t *)data->mask,
-					       data->xMask, data->yMask,
-					       data->width, data->height);
+	    maskClass = _pixman_image_classify (data->mask,
+						data->xMask, data->yMask,
+						data->width, data->height);
 	}
 	else
 	{
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index bd52f25..ec2e58e 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -32,10 +32,78 @@
 
 #define Alpha(x) ((x) >> 24)
 
+static source_pict_class_t
+SourcePictureClassify (pixman_image_t *image,
+		       int	       x,
+		       int	       y,
+		       int	       width,
+		       int	       height)
+{
+    source_image_t *pict = &image->source;
+    
+    if (pict->class != SOURCE_IMAGE_CLASS_UNKNOWN)
+	return pict->class;
+
+    pict->class = SOURCE_IMAGE_CLASS_NEITHER;
+    
+    if (pict->common.type == SOLID)
+    {
+	pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
+    }
+    else if (pict->common.type == LINEAR)
+    {
+	linear_gradient_t *linear = (linear_gradient_t *)pict;
+	pixman_vector_t   v;
+	pixman_fixed_32_32_t l;
+	pixman_fixed_48_16_t dx, dy, a, b, off;
+	pixman_fixed_48_16_t factors[4];
+	int	     i;
+
+	dx = linear->p2.x - linear->p1.x;
+	dy = linear->p2.y - linear->p1.y;
+	l = dx * dx + dy * dy;
+	if (l)
+	{
+	    a = (dx << 32) / l;
+	    b = (dy << 32) / l;
+	}
+	else
+	{
+	    a = b = 0;
+	}
+
+	off = (-a * linear->p1.x
+	       -b * linear->p1.y) >> 16;
+
+	for (i = 0; i < 3; i++)
+	{
+	    v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
+	    v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
+	    v.vector[2] = pixman_fixed_1;
+
+	    if (pict->common.transform)
+	    {
+		if (!pixman_transform_point_3d (pict->common.transform, &v))
+		    return SOURCE_IMAGE_CLASS_UNKNOWN;
+	    }
+
+	    factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
+	}
+
+	if (factors[2] == factors[0])
+	    pict->class = SOURCE_IMAGE_CLASS_HORIZONTAL;
+	else if (factors[1] == factors[0])
+	    pict->class = SOURCE_IMAGE_CLASS_VERTICAL;
+    }
+
+    return pict->class;
+}
+
 static void
 init_source_image (source_image_t *image)
 {
     image->class = SOURCE_IMAGE_CLASS_UNKNOWN;
+    image->common.classify = SourcePictureClassify;
 }
 
 static pixman_bool_t
@@ -100,6 +168,19 @@ allocate_image (void)
     return image;
 }
 
+source_pict_class_t
+_pixman_image_classify (pixman_image_t *image,
+			int             x,
+			int             y,
+			int             width,
+			int             height)
+{
+    if (image->common.classify)
+	return image->common.classify (image, x, y, width, height);
+    else
+	return SOURCE_IMAGE_CLASS_NEITHER;
+}
+
 /* Ref Counting */
 PIXMAN_EXPORT pixman_image_t *
 pixman_image_ref (pixman_image_t *image)
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index debd723..c2b7e44 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -280,9 +280,22 @@ typedef enum
 {
     SOURCE_IMAGE_CLASS_UNKNOWN,
     SOURCE_IMAGE_CLASS_HORIZONTAL,
-    SOURCE_IMAGE_CLASS_VERTICAL
+    SOURCE_IMAGE_CLASS_VERTICAL,
+    SOURCE_IMAGE_CLASS_NEITHER
 } source_pict_class_t;
 
+typedef source_pict_class_t (* classify_func_t) (pixman_image_t *image,
+						 int             x,
+						 int             y,
+						 int             width,
+						 int             height);
+
+source_pict_class_t _pixman_image_classify (pixman_image_t *image,
+					    int             x,
+					    int             y,
+					    int             width,
+					    int             height);
+
 struct point
 {
     int16_t x, y;
@@ -306,6 +319,7 @@ struct image_common
     pixman_bool_t		component_alpha;
     pixman_read_memory_func_t	read_func;
     pixman_write_memory_func_t	write_func;
+    classify_func_t		classify;
 };
 
 struct source_image
@@ -380,6 +394,7 @@ union pixman_image
     image_type_t		type;
     image_common_t		common;
     bits_image_t		bits;
+    source_image_t		source;
     gradient_t			gradient;
     linear_gradient_t		linear;
     conical_gradient_t		conical;
commit 3d73ce6813743811ff4413df554b438d3790c361
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Wed Apr 29 01:44:56 2009 -0400

    More refactoring notes

diff --git a/pixman/refactor b/pixman/refactor
index a3ee523..b57e68a 100644
--- a/pixman/refactor
+++ b/pixman/refactor
@@ -10,6 +10,21 @@ The pixman code is not particularly nice to put it mildly. Among the issues are
 
   may be one of the uglies names ever created.
 
+  coding style: 
+  	 use the one from cairo except that pixman uses this brace style:
+	 
+		while (blah)
+		{
+		}
+
+	Format do while like this:
+
+	       do 
+	       {
+
+	       } 
+	       while (...);
+
 - PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex
 
 - switch case logic in pixman-access.c
@@ -29,6 +44,11 @@ The pixman code is not particularly nice to put it mildly. Among the issues are
 - Code related to particular image types should be split into individual
   files.
 
+	pixman-bits-image.c
+	pixman-linear-gradient-image.c
+	pixman-radial-gradient-image.c
+	pixman-solid-image.c
+
 - Fast path code should be split into files based on architecture:
 
        pixman-mmx-fastpath.c
@@ -67,4 +87,16 @@ Issues that must be kept in mind:
 	 whenever possible.
 
 The "horizontal" classification should be a bit in the image, the
-"vertical" classification should just happen inside the gradient file.
+"vertical" classification should just happen inside the gradient
+file. Note though that
+
+      (a) these will change if the tranformation/repeat changes.
+
+      (b) at the moment the optimization for linear gradients
+          takes the source rectangle into account. Presumably
+	  this is to also optimize the case where the gradient
+	  is close enough to horizontal?
+
+Who is responsible for repeats? In principle it should be the scanline
+fetch. Right now NORMAL repeats are handled by walk_composite_region()
+while other repeats are handled by the scanline code.
commit 4d255141f78451ec5edb27ed29437651d6f64491
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Tue Apr 28 23:02:49 2009 -0400

    Add refactoring notes

diff --git a/pixman/refactor b/pixman/refactor
new file mode 100644
index 0000000..a3ee523
--- /dev/null
+++ b/pixman/refactor
@@ -0,0 +1,70 @@
+Refactoring pixman
+
+The pixman code is not particularly nice to put it mildly. Among the issues are
+
+- inconsistent naming style (fb vs Fb, camelCase vs
+  underscore_naming). Sometimes there is even inconsistency *within*
+  one name.
+
+      fetchProc32 ACCESS(pixman_fetchProcForPicture32)
+
+  may be one of the uglies names ever created.
+
+- PIXMAN_COMPOSITE_RECT_GENERAL() is horribly complex
+
+- switch case logic in pixman-access.c
+
+  Instead it would be better to just store function pointers in the
+  image objects themselves,
+
+  	get_pixel()
+	get_scanline()
+
+- Much of the scanline fetching code is for formats that no one 
+  ever uses. a2r2g2b2 anyone?
+
+  It would probably be worthwhile having a generic fetcher for any
+  pixman format whatsoever.
+
+- Code related to particular image types should be split into individual
+  files.
+
+- Fast path code should be split into files based on architecture:
+
+       pixman-mmx-fastpath.c
+       pixman-sse2-fastpath.c
+       pixman-c-fastpath.c
+
+       etc.
+
+  Each of these files should then export a fastpath table, which would
+  be declared in pixman-private.h. This should allow us to get rid
+  of the pixman-mmx.h files.
+
+There should then be *one* file that implements pixman_image_composite(). 
+This should do this:
+
+     optimize_operator();
+
+     convert 1x1 repeat to solid (actually this should be done at
+     image creation time).
+     
+     is there a useful fastpath?
+
+There should be a file called pixman-cpu.c that contains all the
+architecture specific stuff to detect what CPU features we have.
+
+Issues that must be kept in mind:
+
+       - we need accessor code to be preserved
+
+       - maybe there should be a "store_scanline" too?
+
+         Is this sufficient?
+
+	 We should preserve the optimization where the
+	 compositing happens directly in the destination
+	 whenever possible.
+
+The "horizontal" classification should be a bit in the image, the
+"vertical" classification should just happen inside the gradient file.


More information about the xorg-commit mailing list