pixman: Branch 'master' - 6 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Fri Apr 11 17:26:18 PDT 2008


 configure.ac                |    6 
 pixman/Makefile.am          |   12 
 pixman/combine.inc          | 1464 ++++++++++++++++++++++++++++++++++++++++++++
 pixman/combine.pl           |   69 ++
 pixman/pixman-access.c      |   16 
 pixman/pixman-combine.c     |  188 +++++
 pixman/pixman-compose.c     |   41 -
 pixman/pixman-private.h     |  215 ------
 pixman/pixman-transformed.c |   61 -
 9 files changed, 1796 insertions(+), 276 deletions(-)

New commits:
commit 9f76747adec7ea00e31dd817427c1ed1bfa86aa8
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Thu Apr 3 17:27:08 2008 -0700

    Use a macro to append _accessors to things.
    
    Signed-off-by: Søren Sandmann <sandmann at redhat.com>

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 6c909de..5d91fca 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -31,16 +31,6 @@
 
 #include "pixman-private.h"
 
-#ifdef PIXMAN_FB_ACCESSORS
-#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
-#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
-#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
-#else
-#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
-#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
-#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
-#endif
-
 /*
  * YV12 setup and access macros
  */
@@ -643,7 +633,7 @@ fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
     }
 }
 
-fetchProc FETCH_PROC_FOR_PICTURE (bits_image_t * pict)
+fetchProc ACCESS(pixman_fetchProcForPicture) (bits_image_t * pict)
 {
     switch(pict->format) {
     case PIXMAN_a8r8g8b8: return fbFetch_a8r8g8b8;
@@ -1154,7 +1144,7 @@ fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line)
 	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
 }
 
-fetchPixelProc FETCH_PIXEL_PROC_FOR_PICTURE (bits_image_t * pict)
+fetchPixelProc ACCESS(pixman_fetchPixelProcForPicture) (bits_image_t * pict)
 {
     switch(pict->format) {
     case PIXMAN_a8r8g8b8: return fbFetchPixel_a8r8g8b8;
@@ -1634,7 +1624,7 @@ fbStore_g1 (pixman_image_t *image,
 }
 
 
-storeProc STORE_PROC_FOR_PICTURE (bits_image_t * pict)
+storeProc ACCESS(pixman_storeProcForPicture) (bits_image_t * pict)
 {
     switch(pict->format) {
     case PIXMAN_a8r8g8b8: return fbStore_a8r8g8b8;
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 81b7b1a..09225c7 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -37,27 +37,8 @@
 
 #ifdef PIXMAN_FB_ACCESSORS
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
-
-#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
-#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
-#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
-
-#define FB_FETCH_TRANSFORMED fbFetchTransformed_accessors
-#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha_accessors
-#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha_accessors
-
 #else
-
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
-
-#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
-#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
-#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
-
-#define FB_FETCH_TRANSFORMED fbFetchTransformed
-#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha
-#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha
-
 #endif
 
 static unsigned int
@@ -124,7 +105,7 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
 {
     uint32_t color;
     uint32_t *end;
-    fetchPixelProc fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetchPixelProc fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     color = fetch(pict, 0, 0);
 
@@ -135,7 +116,7 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
 
 static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
-    fetchProc fetch = FETCH_PROC_FOR_PICTURE(pict);
+    fetchProc fetch = ACCESS(pixman_fetchProcForPicture)(pict);
 
     fetch(pict, x, y, width, buffer);
 }
@@ -145,7 +126,7 @@ fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t *bits;
     int32_t stride;
-    storeProc store = STORE_PROC_FOR_PICTURE(pict);
+    storeProc store = ACCESS(pixman_storeProcForPicture)(pict);
     const pixman_indexed_t * indexed = pict->indexed;
 
     bits = pict->bits;
@@ -191,7 +172,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 	if (bits->common.alpha_map)
 	{
-	    fetchSrc = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
+	    fetchSrc = (scanFetchProc)ACCESS(fbFetchExternalAlpha);
 	}
 	else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		 bits->width == 1 &&
@@ -207,7 +188,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	}
 	else
 	{
-	    fetchSrc = (scanFetchProc)FB_FETCH_TRANSFORMED;
+	    fetchSrc = (scanFetchProc)ACCESS(fbFetchTransformed);
 	}
     }
 
@@ -230,7 +211,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 	    if (bits->common.alpha_map)
 	    {
-		fetchMask = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
+		fetchMask = (scanFetchProc)ACCESS(fbFetchExternalAlpha);
 	    }
 	    else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		     bits->width == 1 && bits->height == 1)
@@ -242,14 +223,14 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
                     && bits->common.repeat != PIXMAN_REPEAT_PAD)
 		fetchMask = (scanFetchProc)fbFetch;
 	    else
-		fetchMask = (scanFetchProc)FB_FETCH_TRANSFORMED;
+		fetchMask = (scanFetchProc)ACCESS(fbFetchTransformed);
 	}
     }
 
     if (data->dest->common.alpha_map)
     {
-	fetchDest = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
-	store = (scanStoreProc)FB_STORE_EXTERNAL_ALPHA;
+	fetchDest = (scanFetchProc)ACCESS(fbFetchExternalAlpha);
+	store = (scanStoreProc)ACCESS(fbStoreExternalAlpha);
 
 	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 	    fetchDest = NULL;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index b686770..869bc80 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -510,6 +510,8 @@ union pixman_image
 
 #ifdef PIXMAN_FB_ACCESSORS
 
+#define ACCESS(sym) sym##_accessors
+
 #define READ(img, ptr)							\
     ((img)->common.read_func ((ptr), sizeof(*(ptr))))
 #define WRITE(img, ptr,val)						\
@@ -535,6 +537,8 @@ union pixman_image
 
 #else
 
+#define ACCESS(sym) sym
+
 #define READ(img, ptr)		(*(ptr))
 #define WRITE(img, ptr, val)	(*(ptr) = (val))
 #define MEMCPY_WRAPPED(img, dst, src, size)					\
diff --git a/pixman/pixman-transformed.c b/pixman/pixman-transformed.c
index 19085ca..e3ef17e 100644
--- a/pixman/pixman-transformed.c
+++ b/pixman/pixman-transformed.c
@@ -31,27 +31,6 @@
 
 #include "pixman-private.h"
 
-#ifdef PIXMAN_FB_ACCESSORS
-#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
-#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
-#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
-
-#define FB_FETCH_TRANSFORMED fbFetchTransformed_accessors
-#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha_accessors
-#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha_accessors
-
-#else
-
-#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
-#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
-#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
-
-#define FB_FETCH_TRANSFORMED fbFetchTransformed
-#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha
-#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha
-
-#endif
-
 /*
  * Fetch from region strategies
  */
@@ -93,7 +72,7 @@ fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buff
     int x, y, i;
 
     /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -139,7 +118,7 @@ fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer,
     int x, y, i;
 
     /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -186,7 +165,7 @@ fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buf
     int x, y, i;
 
     /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
     {
@@ -229,7 +208,7 @@ fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buf
     int i;
 
     /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -306,7 +285,7 @@ fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer
     int i;
 
     /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -383,7 +362,7 @@ fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *bu
     int i;
 
     /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
     {
@@ -465,7 +444,7 @@ fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer,
     int32_t cheight = pixman_fixed_to_int(params[1]);
     int xoff = (params[0] - pixman_fixed_1) >> 1;
     int yoff = (params[1] - pixman_fixed_1) >> 1;
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+    fetch = ACCESS(pixman_fetchPixelProcForPicture)(pict);
 
     params += 2;
     for (i = 0; i < width; ++i) {
@@ -567,7 +546,8 @@ adjust (pixman_vector_t *v, pixman_vector_t *u, pixman_fixed_t adjustment)
 }
 
 void
-FB_FETCH_TRANSFORMED(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+ACCESS(fbFetchTransformed)(bits_image_t * pict, int x, int y, int width,
+                           uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t     *bits;
     int32_t    stride;
@@ -650,24 +630,25 @@ FB_FETCH_TRANSFORMED(bits_image_t * pict, int x, int y, int width, uint32_t *buf
 #define SCANLINE_BUFFER_LENGTH 2048
 
 void
-FB_FETCH_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
-                        uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+ACCESS(fbFetchExternalAlpha)(bits_image_t * pict, int x, int y, int width,
+                             uint32_t *buffer, uint32_t *mask,
+                             uint32_t maskBits)
 {
     int i;
     uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
     uint32_t *alpha_buffer = _alpha_buffer;
 
     if (!pict->common.alpha_map) {
-        FB_FETCH_TRANSFORMED (pict, x, y, width, buffer, mask, maskBits);
+        ACCESS(fbFetchTransformed) (pict, x, y, width, buffer, mask, maskBits);
 	return;
     }
     if (width > SCANLINE_BUFFER_LENGTH)
         alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
 
-    FB_FETCH_TRANSFORMED(pict, x, y, width, buffer, mask, maskBits);
-    FB_FETCH_TRANSFORMED((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
-			 y - pict->common.alpha_origin.y, width, alpha_buffer,
-			 mask, maskBits);
+    ACCESS(fbFetchTransformed)(pict, x, y, width, buffer, mask, maskBits);
+    ACCESS(fbFetchTransformed)((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
+                               y - pict->common.alpha_origin.y, width,
+                               alpha_buffer, mask, maskBits);
     for (i = 0; i < width; ++i) {
         if (!mask || mask[i] & maskBits)
 	{
@@ -684,8 +665,8 @@ FB_FETCH_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
 }
 
 void
-FB_STORE_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
-                        uint32_t *buffer)
+ACCESS(fbStoreExternalAlpha)(bits_image_t * pict, int x, int y, int width,
+                             uint32_t *buffer)
 {
     uint32_t *bits, *alpha_bits;
     int32_t stride, astride;
@@ -702,8 +683,8 @@ FB_STORE_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
 	return;
     }
 
-    store = STORE_PROC_FOR_PICTURE(pict);
-    astore = STORE_PROC_FOR_PICTURE(pict->common.alpha_map);
+    store = ACCESS(pixman_storeProcForPicture)(pict);
+    astore = ACCESS(pixman_storeProcForPicture)(pict->common.alpha_map);
     aindexed = pict->common.alpha_map->indexed;
 
     ax = x;
commit a7065162aff9d1de9fc3db9756e4e76f6b8d063b
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Tue Apr 1 17:26:03 2008 -0700

    Generate 64-bit combining functions.
    
    Copy some macros from pixman-private.h into combine.inc and update them to
    generate 64-bit versions as appropriate.  Add a rule to generate
    pixman-combine64.c and add it to the build.
    
    Signed-off-by: Søren Sandmann <sandmann at redhat.com>

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 9ef64ee..74c6102 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -9,6 +9,7 @@ libpixman_1_la_SOURCES =		\
 	pixman-private.h		\
 	pixman-image.c			\
 	pixman-combine32.c		\
+	pixman-combine64.c		\
 	pixman-compose.c		\
 	pixman-compose-accessors.c	\
 	pixman-pict.c			\
@@ -30,8 +31,11 @@ noinst_LTLIBRARIES =
 pixman-combine32.c : combine.inc combine.pl
 	$(PERL) $(srcdir)/combine.pl 8 < $< > $@ || ($(RM) $@; exit 1)
 
+pixman-combine64.c : combine.inc combine.pl
+	$(PERL) $(srcdir)/combine.pl 16 < $< > $@ || ($(RM) $@; exit 1)
+
 EXTRA_DIST = Makefile.win32 combine.inc combine.pl
-CLEANFILES = pixman-combine32.c
+CLEANFILES = pixman-combine32.c pixman-combine64.c
 
 # mmx code
 if USE_MMX
diff --git a/pixman/combine.inc b/pixman/combine.inc
index 42a06b2..c3a57eb 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -10,9 +10,17 @@
  * Helper macros.
  */
 
-#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (FbIntMult(FbGet8(y,i),ay,(u)) + \
-					 FbIntMult(FbGet8(x,i),ax,(v))), \
-				  (comp4_t) ((comp1_t) ((t) |		 \
+#define IntMult(a,b,t) ( (t) = (a) * (b) + ONE_HALF, ( ( ( (t)>>G_SHIFT ) + (t) )>>G_SHIFT ) )
+#define IntDiv(a,b)    (((comp2_t) (a) * MASK) / (b))
+
+#define GetComp(v,i)   ((comp2_t) (comp1_t) ((v) >> i))
+
+#define Add(x,y,i,t)   ((t) = GetComp(x,i) + GetComp(y,i),              \
+                        (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
+
+#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (IntMult(GetComp(y,i),ay,(u)) + \
+					 IntMult(GetComp(x,i),ax,(v))), \
+				  (comp4_t) ((comp1_t) ((t) |		\
 							 (0 - ((t) >> G_SHIFT)))) << (i))
 
 /*
@@ -398,7 +406,7 @@ fbCombineSaturateU (comp4_t *dest, const comp4_t *src, int width)
         da = ~d >> A_SHIFT;
         if (sa > da)
         {
-            sa = FbIntDiv(da, sa);
+            sa = IntDiv(da, sa);
             FbByteMul(s, sa);
         };
         FbByteAdd(d, s);
@@ -455,7 +463,7 @@ fbCombineDisjointOutPart (comp1_t a, comp1_t b)
     b = ~b;		    /* 1 - b */
     if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
 	return MASK;	    /* 1 */
-    return FbIntDiv(b,a);   /* (1-b) / a */
+    return IntDiv(b,a);     /* (1-b) / a */
 }
 
 /* portion covered by both a and b */
@@ -469,7 +477,7 @@ fbCombineDisjointInPart (comp1_t a, comp1_t b)
     b = ~b;		    /* 1 - b */
     if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
 	return 0;	    /* 1 - 1 */
-    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
+    return ~IntDiv(b,a);    /* 1 - (1-b) / a */
 }
 
 /* portion covered by a but not b */
@@ -483,7 +491,7 @@ fbCombineConjointOutPart (comp1_t a, comp1_t b)
 
     if (b >= a)		    /* b >= a -> b/a >= 1 */
 	return 0x00;	    /* 0 */
-    return ~FbIntDiv(b,a);   /* 1 - b/a */
+    return ~IntDiv(b,a);    /* 1 - b/a */
 }
 
 /* portion covered by both a and b */
@@ -494,7 +502,7 @@ fbCombineConjointInPart (comp1_t a, comp1_t b)
 
     if (b >= a)		    /* b >= a -> b/a >= 1 */
 	return MASK;	    /* 1 */
-    return FbIntDiv(b,a);   /* b/a */
+    return IntDiv(b,a);     /* b/a */
 }
 
 FASTCALL static void
@@ -1078,22 +1086,22 @@ fbCombineSaturateC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
         da = ~d >> A_SHIFT;
 
         if (sb <= da)
-            m = FbAdd(s,d,0,t);
+            m = Add(s,d,0,t);
         else
             m = FbGen (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
 
         if (sg <= da)
-            n = FbAdd(s,d,G_SHIFT,t);
+            n = Add(s,d,G_SHIFT,t);
         else
             n = FbGen (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
 
         if (sr <= da)
-            o = FbAdd(s,d,B_SHIFT,t);
+            o = Add(s,d,B_SHIFT,t);
         else
             o = FbGen (s, d, B_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
 
         if (sa <= da)
-            p = FbAdd(s,d,A_SHIFT,t);
+            p = Add(s,d,A_SHIFT,t);
         else
             p = FbGen (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
 
@@ -1128,17 +1136,17 @@ fbCombineDisjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width
             Fa = 0;
             break;
         case CombineAOut:
-            m = fbCombineDisjointOutPart ((comp1_t) (sa >> 0), da);
-            n = fbCombineDisjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = fbCombineDisjointOutPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
-            p = fbCombineDisjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            m = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> 0), da);
+            n = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = (comp4_t)fbCombineDisjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineAIn:
-            m = fbCombineDisjointInPart ((comp1_t) (sa >> 0), da);
-            n = fbCombineDisjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = fbCombineDisjointInPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
-            p = fbCombineDisjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            m = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> 0), da);
+            n = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = (comp4_t)fbCombineDisjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineA:
@@ -1151,27 +1159,27 @@ fbCombineDisjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width
             Fb = 0;
             break;
         case CombineBOut:
-            m = fbCombineDisjointOutPart (da, (comp1_t) (sa >> 0));
-            n = fbCombineDisjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = fbCombineDisjointOutPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
-            p = fbCombineDisjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+            m = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> 0));
+            n = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = (comp4_t)fbCombineDisjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineBIn:
-            m = fbCombineDisjointInPart (da, (comp1_t) (sa >> 0));
-            n = fbCombineDisjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = fbCombineDisjointInPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
-            p = fbCombineDisjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+            m = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> 0));
+            n = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = (comp4_t)fbCombineDisjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineB:
             Fb = ~0;
             break;
         }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,G_SHIFT,FbGet8(Fa,G_SHIFT),FbGet8(Fb,G_SHIFT),t, u, v);
-        o = FbGen (s,d,B_SHIFT,FbGet8(Fa,B_SHIFT),FbGet8(Fb,B_SHIFT),t, u, v);
-        p = FbGen (s,d,A_SHIFT,FbGet8(Fa,A_SHIFT),FbGet8(Fb,A_SHIFT),t, u, v);
+        m = FbGen (s,d,0,GetComp(Fa,0),GetComp(Fb,0),t, u, v);
+        n = FbGen (s,d,G_SHIFT,GetComp(Fa,G_SHIFT),GetComp(Fb,G_SHIFT),t, u, v);
+        o = FbGen (s,d,B_SHIFT,GetComp(Fa,B_SHIFT),GetComp(Fb,B_SHIFT),t, u, v);
+        p = FbGen (s,d,A_SHIFT,GetComp(Fa,A_SHIFT),GetComp(Fb,A_SHIFT),t, u, v);
         s = m|n|o|p;
 	*(dest + i) = s;
     }
@@ -1252,17 +1260,17 @@ fbCombineConjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width
             Fa = 0;
             break;
         case CombineAOut:
-            m = fbCombineConjointOutPart ((comp1_t) (sa >> 0), da);
-            n = fbCombineConjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = fbCombineConjointOutPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
-            p = fbCombineConjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            m = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> 0), da);
+            n = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = (comp4_t)fbCombineConjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineAIn:
-            m = fbCombineConjointInPart ((comp1_t) (sa >> 0), da);
-            n = fbCombineConjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = fbCombineConjointInPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
-            p = fbCombineConjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+            m = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> 0), da);
+            n = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = (comp4_t)fbCombineConjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineA:
@@ -1275,27 +1283,27 @@ fbCombineConjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width
             Fb = 0;
             break;
         case CombineBOut:
-            m = fbCombineConjointOutPart (da, (comp1_t) (sa >> 0));
-            n = fbCombineConjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = fbCombineConjointOutPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
-            p = fbCombineConjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+            m = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> 0));
+            n = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = (comp4_t)fbCombineConjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineBIn:
-            m = fbCombineConjointInPart (da, (comp1_t) (sa >> 0));
-            n = fbCombineConjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = fbCombineConjointInPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
-            p = fbCombineConjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+            m = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> 0));
+            n = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = (comp4_t)fbCombineConjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineB:
             Fb = ~0;
             break;
         }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,G_SHIFT,FbGet8(Fa,G_SHIFT),FbGet8(Fb,G_SHIFT),t, u, v);
-        o = FbGen (s,d,B_SHIFT,FbGet8(Fa,B_SHIFT),FbGet8(Fb,B_SHIFT),t, u, v);
-        p = FbGen (s,d,A_SHIFT,FbGet8(Fa,A_SHIFT),FbGet8(Fb,A_SHIFT),t, u, v);
+        m = FbGen (s,d,0,GetComp(Fa,0),GetComp(Fb,0),t, u, v);
+        n = FbGen (s,d,G_SHIFT,GetComp(Fa,G_SHIFT),GetComp(Fb,G_SHIFT),t, u, v);
+        o = FbGen (s,d,B_SHIFT,GetComp(Fa,B_SHIFT),GetComp(Fb,B_SHIFT),t, u, v);
+        p = FbGen (s,d,A_SHIFT,GetComp(Fa,A_SHIFT),GetComp(Fb,A_SHIFT),t, u, v);
         s = m|n|o|p;
 	*(dest + i) = s;
     }
diff --git a/pixman/combine.pl b/pixman/combine.pl
index 8024333..ba13d6c 100644
--- a/pixman/combine.pl
+++ b/pixman/combine.pl
@@ -62,5 +62,8 @@ while (<STDIN>) {
     s/comp2_t/uint${half_pixel_size}_t/g;
     s/comp4_t/uint${pixel_size}_t/g;
 
+    # Change the function table name for the 64-bit version.
+    s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16;
+
     print;
 }
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index e8c3f39..b686770 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -185,6 +185,7 @@ typedef struct _FbComposeFunctions64 {
 } FbComposeFunctions64;
 
 extern FbComposeFunctions32 pixman_composeFunctions;
+extern FbComposeFunctions64 pixman_composeFunctions64;
 
 void pixman_composite_rect_general_accessors (const FbComposeData *data,
 					      uint32_t *scanline_buffer);
commit 48521e6fe4378f9cc49d2dfb8d87490e65267a49
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Tue Apr 1 16:31:26 2008 -0700

    Replace hardcoded numbers and uint*_t in combine.inc.
    
    The masks and shifts have been replaced with preprocessor defines generated by
    combine.pl.  The uint*_t types have been replaced with comp4_t, comp2_t, and
    comp1_t depending on how many components the value is supposed to hold.
    
    Signed-off-by: Søren Sandmann <sandmann at redhat.com>

diff --git a/pixman/combine.inc b/pixman/combine.inc
index 9cd51a4..42a06b2 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -12,9 +12,8 @@
 
 #define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (FbIntMult(FbGet8(y,i),ay,(u)) + \
 					 FbIntMult(FbGet8(x,i),ax,(v))), \
-				  (uint32_t) ((uint8_t) ((t) |		\
-							 (0 - ((t) >> 8)))) << (i))
-
+				  (comp4_t) ((comp1_t) ((t) |		 \
+							 (0 - ((t) >> G_SHIFT)))) << (i))
 
 /*
   The methods below use some tricks to be able to do two color
@@ -24,66 +23,67 @@
 /*
   x_c = (x_c * a) / 255
 */
-#define FbByteMul(x, a) do {					    \
-        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
-        t &= 0xff00ff;						    \
-								    \
-        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
-        x = (x + ((x >> 8) & 0xff00ff));			    \
-        x &= 0xff00ff00;					    \
-        x += t;							    \
+#define FbByteMul(x, a) do {                                            \
+        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
+        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+        t &= RB_MASK;                                                   \
+                                                                        \
+        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
+        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK));                    \
+        x &= RB_MASK << COMPONENT_SIZE;                                 \
+        x += t;                                                         \
     } while (0)
 
 /*
   x_c = (x_c * a) / 255 + y
 */
-#define FbByteMulAdd(x, a, y) do {				    \
-        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
-        t &= 0xff00ff;						    \
-        t += y & 0xff00ff;					    \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
-        t &= 0xff00ff;						    \
-								    \
-        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
-        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
-        x &= 0xff00ff;                                              \
-        x += (y >> 8) & 0xff00ff;                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
-        x &= 0xff00ff;                                              \
-        x <<= 8;                                                    \
-        x += t;                                                     \
+#define FbByteMulAdd(x, a, y) do {                                      \
+        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
+        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+        t &= RB_MASK;                                                   \
+        t += y & RB_MASK;                                               \
+        t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK);      \
+        t &= RB_MASK;                                                   \
+                                                                        \
+        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
+        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+        x &= RB_MASK;                                                   \
+        x += (y >> COMPONENT_SIZE) & RB_MASK;                           \
+        x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK);      \
+        x &= RB_MASK;                                                   \
+        x <<= COMPONENT_SIZE;                                           \
+        x += t;                                                         \
     } while (0)
 
 /*
   x_c = (x_c * a + y_c * b) / 255
 */
 #define FbByteAddMul(x, a, y, b) do {                                   \
-        uint32_t t;							\
-        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
-        t += (t >> 8) + 0x8000;                                         \
-        t >>= 16;                                                       \
-									\
-        t |= r << 16;                                                   \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        t &= 0xff00ff;                                                  \
-        t <<= 8;                                                        \
-									\
-        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
-        x += (x >> 8);                                                  \
-        x >>= 8;                                                        \
-        x |= r << 16;                                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
-        x &= 0xff00ff;                                                  \
+        comp4_t t;                                                      \
+        comp4_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \
+        r += (r >> G_SHIFT);                                            \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        t = (x & G_MASK) * a + (y & G_MASK) * b;                        \
+        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
+        t >>= B_SHIFT;                                                  \
+                                                                        \
+        t |= r << B_SHIFT;                                              \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        t &= RB_MASK;                                                   \
+        t <<= G_SHIFT;                                                  \
+                                                                        \
+        r = ((x >> B_SHIFT) & MASK) * a +                               \
+            ((y >> B_SHIFT) & MASK) * b + ONE_HALF;                     \
+        r += (r >> G_SHIFT);                                            \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        x = (x & MASK) * a + (y & MASK) * b + ONE_HALF;                 \
+        x += (x >> G_SHIFT);                                            \
+        x >>= G_SHIFT;                                                  \
+        x |= r << B_SHIFT;                                              \
+        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
+        x &= RB_MASK;                                                   \
         x |= t;                                                         \
     } while (0)
 
@@ -91,89 +91,91 @@
   x_c = (x_c * a + y_c *b) / 256
 */
 #define FbByteAddMul_256(x, a, y, b) do {                               \
-        uint32_t t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;		\
-        t >>= 8;                                                        \
-        t &= 0xff00ff;                                                  \
-									\
-        x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;      \
-        x &= 0xff00ff00;                                                \
+        comp4_t t = (x & RB_MASK) * a + (y & RB_MASK) * b;              \
+        t >>= G_SHIFT;                                                  \
+        t &= RB_MASK;                                                   \
+                                                                        \
+        x = ((x >> G_SHIFT) & RB_MASK) * a +                            \
+            ((y >> G_SHIFT) & RB_MASK) * b;                             \
+        x &= AG_MASK;                                                   \
         x += t;                                                         \
     } while (0)
 
 /*
   x_c = (x_c * a_c) / 255
 */
-#define FbByteMulC(x, a) do {				  \
-        uint32_t t;                                       \
-        uint32_t r = (x & 0xff) * (a & 0xff);             \
-        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
-	r += 0x800080;					  \
-        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
-        r &= 0xff00ff;					  \
-							  \
-        x >>= 8;					  \
-        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
-        t |= (x & 0xff0000) * (a >> 24);		  \
-        t += 0x800080;					  \
-        t = t + ((t >> 8) & 0xff00ff);			  \
-        x = r | (t & 0xff00ff00);			  \
-							  \
+#define FbByteMulC(x, a) do {                                           \
+        comp4_t t;                                                      \
+        comp4_t r = (x & MASK) * (a & MASK);                            \
+        r |= (x & B_MASK) * ((a >> B_SHIFT) & MASK);                    \
+        r += RB_ONE_HALF;                                               \
+        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        r &= RB_MASK;                                                   \
+                                                                        \
+        x >>= G_SHIFT;                                                  \
+        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
+        t |= (x & B_MASK) * (a >> A_SHIFT);                             \
+        t += RB_ONE_HALF;                                               \
+        t = t + ((t >> G_SHIFT) & RB_MASK);                             \
+        x = r | (t & AG_MASK);                                          \
     } while (0)
 
 /*
   x_c = (x_c * a) / 255 + y
 */
-#define FbByteMulAddC(x, a, y) do {				      \
-        uint32_t t;                                                   \
-        uint32_t r = (x & 0xff) * (a & 0xff);                         \
-        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
-	r += 0x800080;						      \
-	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
-        r &= 0xff00ff;						      \
-        r += y & 0xff00ff;					      \
-        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
-        r &= 0xff00ff;						      \
-								      \
-        x >>= 8;                                                       \
-        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
-        t |= (x & 0xff0000) * (a >> 24);                               \
-	t += 0x800080;                                                 \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
-        t &= 0xff00ff;                                                 \
-        t += (y >> 8) & 0xff00ff;                                      \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
-        t &= 0xff00ff;                                                 \
-        x = r | (t << 8);                                              \
+#define FbByteMulAddC(x, a, y) do {                                     \
+        comp4_t t;                                                      \
+        comp4_t r = (x & MASK) * (a & MASK);                            \
+        r |= (x & B_MASK) * ((a >> B_SHIFT) & MASK);                    \
+        r += RB_ONE_HALF;                                               \
+        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        r &= RB_MASK;                                                   \
+        r += y & RB_MASK;                                               \
+        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
+        r &= RB_MASK;                                                   \
+                                                                        \
+        x >>= G_SHIFT;                                                  \
+        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
+        t |= (x & B_MASK) * (a >> A_SHIFT);                             \
+        t += RB_ONE_HALF;                                               \
+        t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        t &= RB_MASK;                                                   \
+        t += (y >> G_SHIFT) & RB_MASK;                                  \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        t &= RB_MASK;                                                   \
+        x = r | (t << G_SHIFT);                                         \
     } while (0)
 
 /*
   x_c = (x_c * a_c + y_c * b) / 255
 */
 #define FbByteAddMulC(x, a, y, b) do {                                  \
-        uint32_t t;							\
-        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
-        r += (r >> 8) + 0x80;                                           \
-        r >>= 8;                                                        \
-									\
-        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
-        t += (t >> 8) + 0x8000;                                         \
-        t >>= 16;                                                       \
-									\
-        t |= r << 16;                                                   \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        t &= 0xff00ff;                                                  \
-        t <<= 8;                                                        \
-									\
-        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
-        x += (x >> 8);                                                  \
-        x >>= 8;                                                        \
-        x |= r << 16;                                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
-        x &= 0xff00ff;                                                  \
+        comp4_t t;                                                      \
+        comp4_t r = (x >> A_SHIFT) * (a >> A_SHIFT) +                   \
+                     (y >> A_SHIFT) * b;                                \
+        r += (r >> G_SHIFT) + ONE_HALF;                                 \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b;  \
+        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
+        t >>= B_SHIFT;                                                  \
+                                                                        \
+        t |= r << B_SHIFT;                                              \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        t &= RB_MASK;                                                   \
+        t <<= G_SHIFT;                                                  \
+                                                                        \
+        r = ((x >> B_SHIFT) & MASK) * ((a >> B_SHIFT) & MASK) +         \
+            ((y >> B_SHIFT) & MASK) * b + ONE_HALF;                     \
+        r += (r >> G_SHIFT);                                            \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF;        \
+        x += (x >> G_SHIFT);                                            \
+        x >>= G_SHIFT;                                                  \
+        x |= r << B_SHIFT;                                              \
+        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
+        x &= RB_MASK;                                                   \
         x |= t;                                                         \
     } while (0)
 
@@ -181,14 +183,14 @@
   x_c = min(x_c + y_c, 255)
 */
 #define FbByteAdd(x, y) do {                                            \
-        uint32_t t;							\
-        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
-        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
-        r &= 0xff00ff;                                                  \
-									\
-        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        r |= (t & 0xff00ff) << 8;                                       \
+        comp4_t t;                                                      \
+        comp4_t r = (x & RB_MASK) + (y & RB_MASK);                      \
+        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
+        r &= RB_MASK;                                                   \
+                                                                        \
+        t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK);    \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        r |= (t & RB_MASK) << G_SHIFT;                                  \
         x = r;                                                          \
     } while (0)
 
@@ -206,12 +208,12 @@
  * Combine src and mask
  */
 FASTCALL static void
-pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
+pixman_fbCombineMaskU (comp4_t *src, const comp4_t *mask, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t a = *(mask + i) >> 24;
-        uint32_t s = *(src + i);
+        comp4_t a = *(mask + i) >> A_SHIFT;
+        comp4_t s = *(src + i);
         FbByteMul(s, a);
         *(src + i) = s;
     }
@@ -222,26 +224,26 @@ pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
  */
 
 FASTCALL static void
-fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
+fbCombineClear (comp4_t *dest, const comp4_t *src, int width)
 {
-    memset(dest, 0, width*sizeof(uint32_t));
+    memset(dest, 0, width*sizeof(comp4_t));
 }
 
 FASTCALL static void
-fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineSrcU (comp4_t *dest, const comp4_t *src, int width)
 {
-    memcpy(dest, src, width*sizeof(uint32_t));
+    memcpy(dest, src, width*sizeof(comp4_t));
 }
 
 /* if the Src is opaque, call fbCombineSrcU */
 FASTCALL static void
-fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineOverU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~s);
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t ia = Alpha(~s);
 
         FbByteMulAdd(d, ia, s);
 	*(dest + i) = d;
@@ -250,13 +252,13 @@ fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
 
 /* if the Dst is opaque, this is a noop */
 FASTCALL static void
-fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineOverReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~*(dest + i));
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t ia = Alpha(~*(dest + i));
         FbByteMulAdd(s, ia, d);
 	*(dest + i) = s;
     }
@@ -264,12 +266,12 @@ fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
 
 /* if the Dst is opaque, call fbCombineSrcU */
 FASTCALL static void
-fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineInU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(*(dest + i));
+        comp4_t s = *(src + i);
+        comp4_t a = Alpha(*(dest + i));
         FbByteMul(s, a);
 	*(dest + i) = s;
     }
@@ -277,12 +279,12 @@ fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
 
 /* if the Src is opaque, this is a noop */
 FASTCALL static void
-fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineInReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(*(src + i));
+        comp4_t d = *(dest + i);
+        comp4_t a = Alpha(*(src + i));
         FbByteMul(d, a);
 	*(dest + i) = d;
     }
@@ -290,12 +292,12 @@ fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
 
 /* if the Dst is opaque, call fbCombineClear */
 FASTCALL static void
-fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineOutU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(~*(dest + i));
+        comp4_t s = *(src + i);
+        comp4_t a = Alpha(~*(dest + i));
         FbByteMul(s, a);
 	*(dest + i) = s;
     }
@@ -303,12 +305,12 @@ fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
 
 /* if the Src is opaque, call fbCombineClear */
 FASTCALL static void
-fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineOutReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(~*(src + i));
+        comp4_t d = *(dest + i);
+        comp4_t a = Alpha(~*(src + i));
         FbByteMul(d, a);
 	*(dest + i) = d;
     }
@@ -318,14 +320,14 @@ fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
 /* if the Dst is opaque, call fbCombineOverU */
 /* if both the Src and Dst are opaque, call fbCombineSrcU */
 FASTCALL static void
-fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineAtopU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t dest_a = Alpha(d);
-        uint32_t src_ia = Alpha(~s);
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t dest_a = Alpha(d);
+        comp4_t src_ia = Alpha(~s);
 
         FbByteAddMul(s, dest_a, d, src_ia);
 	*(dest + i) = s;
@@ -336,14 +338,14 @@ fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
 /* if the Dst is opaque, call fbCombineInReverseU */
 /* if both the Src and Dst are opaque, call fbCombineDstU */
 FASTCALL static void
-fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_a = Alpha(s);
-        uint32_t dest_ia = Alpha(~d);
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t src_a = Alpha(s);
+        comp4_t dest_ia = Alpha(~d);
 
         FbByteAddMul(s, dest_ia, d, src_a);
 	*(dest + i) = s;
@@ -354,14 +356,14 @@ fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
 /* if the Dst is opaque, call fbCombineOverReverseU */
 /* if both the Src and Dst are opaque, call fbCombineClear */
 FASTCALL static void
-fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineXorU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_ia = Alpha(~s);
-        uint32_t dest_ia = Alpha(~d);
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t src_ia = Alpha(~s);
+        comp4_t dest_ia = Alpha(~d);
 
         FbByteAddMul(s, dest_ia, d, src_ia);
 	*(dest + i) = s;
@@ -369,12 +371,12 @@ fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
 }
 
 FASTCALL static void
-fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineAddU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
         FbByteAdd(d, s);
 	*(dest + i) = d;
     }
@@ -384,16 +386,16 @@ fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
 /* if the Dst is opaque, call fbCombineAddU */
 /* if both the Src and Dst are opaque, call fbCombineAddU */
 FASTCALL static void
-fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineSaturateU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint16_t  sa, da;
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp2_t sa, da;
 
-        sa = s >> 24;
-        da = ~d >> 24;
+        sa = s >> A_SHIFT;
+        da = ~d >> A_SHIFT;
         if (sa > da)
         {
             sa = FbIntDiv(da, sa);
@@ -445,20 +447,20 @@ fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
 #define CombineXor	(CombineAOut|CombineBOut)
 
 /* portion covered by a but not b */
-FASTCALL static uint8_t
-fbCombineDisjointOutPart (uint8_t a, uint8_t b)
+FASTCALL static comp1_t
+fbCombineDisjointOutPart (comp1_t a, comp1_t b)
 {
     /* min (1, (1-b) / a) */
 
     b = ~b;		    /* 1 - b */
     if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0xff;	    /* 1 */
+	return MASK;	    /* 1 */
     return FbIntDiv(b,a);   /* (1-b) / a */
 }
 
 /* portion covered by both a and b */
-FASTCALL static uint8_t
-fbCombineDisjointInPart (uint8_t a, uint8_t b)
+FASTCALL static comp1_t
+fbCombineDisjointInPart (comp1_t a, comp1_t b)
 {
     /* max (1-(1-b)/a,0) */
     /*  = - min ((1-b)/a - 1, 0) */
@@ -471,8 +473,8 @@ fbCombineDisjointInPart (uint8_t a, uint8_t b)
 }
 
 /* portion covered by a but not b */
-FASTCALL static uint8_t
-fbCombineConjointOutPart (uint8_t a, uint8_t b)
+FASTCALL static comp1_t
+fbCombineConjointOutPart (comp1_t a, comp1_t b)
 {
     /* max (1-b/a,0) */
     /* = 1-min(b/a,1) */
@@ -485,27 +487,27 @@ fbCombineConjointOutPart (uint8_t a, uint8_t b)
 }
 
 /* portion covered by both a and b */
-FASTCALL static uint8_t
-fbCombineConjointInPart (uint8_t a, uint8_t b)
+FASTCALL static comp1_t
+fbCombineConjointInPart (comp1_t a, comp1_t b)
 {
     /* min (1,b/a) */
 
     if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0xff;	    /* 1 */
+	return MASK;	    /* 1 */
     return FbIntDiv(b,a);   /* b/a */
 }
 
 FASTCALL static void
-fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+fbCombineDisjointGeneralU (comp4_t *dest, const comp4_t *src, int width, comp1_t combine)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t m,n,o,p;
-        uint16_t Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t m,n,o,p;
+        comp2_t Fa, Fb, t, u, v;
+        comp1_t sa = s >> A_SHIFT;
+        comp1_t da = d >> A_SHIFT;
 
         switch (combine & CombineA) {
         default:
@@ -518,7 +520,7 @@ fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
             Fa = fbCombineDisjointInPart (sa, da);
             break;
         case CombineA:
-            Fa = 0xff;
+            Fa = MASK;
             break;
         }
 
@@ -533,32 +535,32 @@ fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
             Fb = fbCombineDisjointInPart (da, sa);
             break;
         case CombineB:
-            Fb = 0xff;
+            Fb = MASK;
             break;
         }
         m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        n = FbGen (s,d,G_SHIFT,Fa,Fb,t, u, v);
+        o = FbGen (s,d,B_SHIFT,Fa,Fb,t, u, v);
+        p = FbGen (s,d,A_SHIFT,Fa,Fb,t, u, v);
         s = m|n|o|p;
 	*(dest + i) = s;
     }
 }
 
 FASTCALL static void
-fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointOverU (comp4_t *dest, const comp4_t *src, int width)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint16_t  a = s >> 24;
+        comp4_t s = *(src + i);
+        comp2_t a = s >> A_SHIFT;
 
         if (a != 0x00)
         {
-            if (a != 0xff)
+            if (a != MASK)
             {
-                uint32_t d = *(dest + i);
-                a = fbCombineDisjointOutPart (d >> 24, a);
+                comp4_t d = *(dest + i);
+                a = fbCombineDisjointOutPart (d >> A_SHIFT, a);
                 FbByteMulAdd(d, a, s);
                 s = d;
             }
@@ -568,58 +570,58 @@ fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
 }
 
 FASTCALL static void
-fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointInU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
 }
 
 FASTCALL static void
-fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointInReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointOutU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointOutReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointAtopU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineDisjointXorU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineDisjointGeneralU (dest, src, width, CombineXor);
 }
 
 FASTCALL static void
-fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+fbCombineConjointGeneralU (comp4_t *dest, const comp4_t *src, int width, comp1_t combine)
 {
     int i;
     for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t  m,n,o,p;
-        uint16_t  Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
+        comp4_t s = *(src + i);
+        comp4_t d = *(dest + i);
+        comp4_t m,n,o,p;
+        comp2_t Fa, Fb, t, u, v;
+        comp1_t sa = s >> A_SHIFT;
+        comp1_t da = d >> A_SHIFT;
 
         switch (combine & CombineA) {
         default:
@@ -632,7 +634,7 @@ fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
             Fa = fbCombineConjointInPart (sa, da);
             break;
         case CombineA:
-            Fa = 0xff;
+            Fa = MASK;
             break;
         }
 
@@ -647,71 +649,71 @@ fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8
             Fb = fbCombineConjointInPart (da, sa);
             break;
         case CombineB:
-            Fb = 0xff;
+            Fb = MASK;
             break;
         }
         m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        n = FbGen (s,d,G_SHIFT,Fa,Fb,t, u, v);
+        o = FbGen (s,d,B_SHIFT,Fa,Fb,t, u, v);
+        p = FbGen (s,d,A_SHIFT,Fa,Fb,t, u, v);
         s = m|n|o|p;
 	*(dest + i) = s;
     }
 }
 
 FASTCALL static void
-fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointOverU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineAOver);
 }
 
 
 FASTCALL static void
-fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointOverReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineBOver);
 }
 
 
 FASTCALL static void
-fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointInU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineAIn);
 }
 
 
 FASTCALL static void
-fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointInReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointOutU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointOutReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointAtopU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointAtopReverseU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
+fbCombineConjointXorU (comp4_t *dest, const comp4_t *src, int width)
 {
     fbCombineConjointGeneralU (dest, src, width, CombineXor);
 }
@@ -721,12 +723,12 @@ fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
 /********************************************************************************/
 
 FASTCALL static void
-fbCombineMaskC (uint32_t *src, uint32_t *mask)
+fbCombineMaskC (comp4_t *src, comp4_t *mask)
 {
-    uint32_t a = *mask;
+    comp4_t a = *mask;
 
-    uint32_t	x;
-    uint16_t	xa;
+    comp4_t	x;
+    comp2_t	xa;
 
     if (!a)
     {
@@ -735,16 +737,16 @@ fbCombineMaskC (uint32_t *src, uint32_t *mask)
     }
 
     x = *(src);
-    if (a == 0xffffffff)
+    if (a == ~0)
     {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
+	x = x >> A_SHIFT;
+	x |= x << G_SHIFT;
+	x |= x << B_SHIFT;
 	*(mask) = x;
 	return;
     }
 
-    xa = x >> 24;
+    xa = x >> A_SHIFT;
     FbByteMulC(x, a);
     *(src) = x;
     FbByteMul(a, xa);
@@ -752,10 +754,10 @@ fbCombineMaskC (uint32_t *src, uint32_t *mask)
 }
 
 FASTCALL static void
-fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
+fbCombineMaskValueC (comp4_t *src, const comp4_t *mask)
 {
-    uint32_t a = *mask;
-    uint32_t	x;
+    comp4_t a = *mask;
+    comp4_t	x;
 
     if (!a)
     {
@@ -763,7 +765,7 @@ fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
 	return;
     }
 
-    if (a == 0xffffffff)
+    if (a == ~0)
 	return;
 
     x = *(src);
@@ -772,22 +774,22 @@ fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
 }
 
 FASTCALL static void
-fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
+fbCombineMaskAlphaC (const comp4_t *src, comp4_t *mask)
 {
-    uint32_t a = *(mask);
-    uint32_t	x;
+    comp4_t a = *(mask);
+    comp4_t	x;
 
     if (!a)
 	return;
 
-    x = *(src) >> 24;
-    if (x == 0xff)
+    x = *(src) >> A_SHIFT;
+    if (x == MASK)
 	return;
-    if (a == 0xffffffff)
+    if (a == ~0)
     {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
+	x = x >> A_SHIFT;
+	x |= x << G_SHIFT;
+	x |= x << B_SHIFT;
 	*(mask) = x;
 	return;
     }
@@ -799,19 +801,19 @@ fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
 
 
 FASTCALL static void
-fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineClearC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
-    memset(dest, 0, width*sizeof(uint32_t));
+    memset(dest, 0, width*sizeof(comp4_t));
 }
 
 FASTCALL static void
-fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineSrcC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
 
 	fbCombineMaskValueC (&s, &m);
 
@@ -820,23 +822,23 @@ fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t a;
 
 	fbCombineMaskC (&s, &m);
 
 	a = ~m;
-        if (a != 0xffffffff)
+        if (a != ~0)
         {
             if (a)
             {
-                uint32_t d = *(dest + i);
+                comp4_t d = *(dest + i);
                 FbByteMulAddC(d, a, s);
                 s = d;
             }
@@ -846,22 +848,22 @@ fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineOverReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = ~d >> 24;
+        comp4_t d = *(dest + i);
+        comp4_t a = ~d >> A_SHIFT;
 
         if (a)
         {
-            uint32_t s = *(src + i);
-	    uint32_t m = *(mask + i);
+            comp4_t s = *(src + i);
+	    comp4_t m = *(mask + i);
 
 	    fbCombineMaskValueC (&s, &m);
 
-            if (a != 0xff)
+            if (a != MASK)
             {
                 FbByteMulAdd(s, a, d);
             }
@@ -871,21 +873,21 @@ fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = d >> 24;
-        uint32_t s = 0;
+        comp4_t d = *(dest + i);
+        comp2_t a = d >> A_SHIFT;
+        comp4_t s = 0;
         if (a)
         {
-	    uint32_t m = *(mask + i);
+	    comp4_t m = *(mask + i);
 
 	    s = *(src + i);
 	    fbCombineMaskValueC (&s, &m);
-            if (a != 0xff)
+            if (a != MASK)
             {
                 FbByteMul(s, a);
             }
@@ -895,21 +897,21 @@ fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t a;
+        comp4_t s = *(src + i);
+        comp4_t m = *(mask + i);
+        comp4_t a;
 
 	fbCombineMaskAlphaC (&s, &m);
 
 	a = m;
-        if (a != 0xffffffff)
+        if (a != ~0)
         {
-            uint32_t d = 0;
+            comp4_t d = 0;
             if (a)
             {
                 d = *(dest + i);
@@ -921,22 +923,22 @@ fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = ~d >> 24;
-        uint32_t s = 0;
+        comp4_t d = *(dest + i);
+        comp2_t a = ~d >> A_SHIFT;
+        comp4_t s = 0;
         if (a)
         {
-	    uint32_t m = *(mask + i);
+	    comp4_t m = *(mask + i);
 
 	    s = *(src + i);
 	    fbCombineMaskValueC (&s, &m);
 
-            if (a != 0xff)
+            if (a != MASK)
             {
                 FbByteMul(s, a);
             }
@@ -946,21 +948,21 @@ fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t a;
 
 	fbCombineMaskAlphaC (&s, &m);
 
         a = ~m;
-        if (a != 0xffffffff)
+        if (a != ~0)
         {
-            uint32_t d = 0;
+            comp4_t d = 0;
             if (a)
             {
                 d = *(dest + i);
@@ -972,16 +974,16 @@ fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = d >> 24;
+        comp4_t d = *(dest + i);
+        comp4_t s = *(src + i);
+        comp4_t m = *(mask + i);
+        comp4_t ad;
+        comp2_t as = d >> A_SHIFT;
 
 	fbCombineMaskC (&s, &m);
 
@@ -993,17 +995,17 @@ fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
 
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
+        comp4_t d = *(dest + i);
+        comp4_t s = *(src + i);
+        comp4_t m = *(mask + i);
+        comp4_t ad;
+        comp2_t as = ~d >> A_SHIFT;
 
 	fbCombineMaskC (&s, &m);
 
@@ -1015,16 +1017,16 @@ fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
+        comp4_t d = *(dest + i);
+        comp4_t s = *(src + i);
+        comp4_t m = *(mask + i);
+        comp4_t ad;
+        comp2_t as = ~d >> A_SHIFT;
 
 	fbCombineMaskC (&s, &m);
 
@@ -1036,14 +1038,14 @@ fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineAddC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t d = *(dest + i);
+        comp4_t s = *(src + i);
+        comp4_t m = *(mask + i);
+        comp4_t d = *(dest + i);
 
 	fbCombineMaskValueC (&s, &m);
 
@@ -1053,15 +1055,15 @@ fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 }
 
 FASTCALL static void
-fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineSaturateC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint16_t  sa, sr, sg, sb, da;
-        uint16_t  t, u, v;
-        uint32_t  m,n,o,p;
+        comp4_t s, d;
+        comp2_t sa, sr, sg, sb, da;
+        comp2_t t, u, v;
+        comp4_t m,n,o,p;
 
         d = *(dest + i);
         s = *(src + i);
@@ -1069,53 +1071,53 @@ fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 
 	fbCombineMaskC (&s, &m);
 
-        sa = (m >> 24);
-        sr = (m >> 16) & 0xff;
-        sg = (m >>  8) & 0xff;
-        sb = (m      ) & 0xff;
-        da = ~d >> 24;
+        sa = (m >> A_SHIFT);
+        sr = (m >> B_SHIFT) & MASK;
+        sg = (m >> G_SHIFT) & MASK;
+        sb =  m             & MASK;
+        da = ~d >> A_SHIFT;
 
         if (sb <= da)
             m = FbAdd(s,d,0,t);
         else
-            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
+            m = FbGen (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
 
         if (sg <= da)
-            n = FbAdd(s,d,8,t);
+            n = FbAdd(s,d,G_SHIFT,t);
         else
-            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
+            n = FbGen (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
 
         if (sr <= da)
-            o = FbAdd(s,d,16,t);
+            o = FbAdd(s,d,B_SHIFT,t);
         else
-            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
+            o = FbGen (s, d, B_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
 
         if (sa <= da)
-            p = FbAdd(s,d,24,t);
+            p = FbAdd(s,d,A_SHIFT,t);
         else
-            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
+            p = FbGen (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
 
 	*(dest + i) = m|n|o|p;
     }
 }
 
 FASTCALL static void
-fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+fbCombineDisjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width, comp1_t combine)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
+        comp4_t s, d;
+        comp4_t m,n,o,p;
+        comp4_t Fa, Fb;
+        comp2_t t, u, v;
+        comp4_t sa;
+        comp1_t da;
 
         s = *(src + i);
         m = *(mask + i);
         d = *(dest + i);
-        da = d >> 24;
+        da = d >> A_SHIFT;
 
 	fbCombineMaskC (&s, &m);
 
@@ -1126,21 +1128,21 @@ fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
             Fa = 0;
             break;
         case CombineAOut:
-            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            m = fbCombineDisjointOutPart ((comp1_t) (sa >> 0), da);
+            n = fbCombineDisjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = fbCombineDisjointOutPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = fbCombineDisjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineAIn:
-            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            m = fbCombineDisjointInPart ((comp1_t) (sa >> 0), da);
+            n = fbCombineDisjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = fbCombineDisjointInPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = fbCombineDisjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineA:
-            Fa = 0xffffffff;
+            Fa = ~0;
             break;
         }
 
@@ -1149,97 +1151,97 @@ fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
             Fb = 0;
             break;
         case CombineBOut:
-            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            m = fbCombineDisjointOutPart (da, (comp1_t) (sa >> 0));
+            n = fbCombineDisjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = fbCombineDisjointOutPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = fbCombineDisjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineBIn:
-            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            m = fbCombineDisjointInPart (da, (comp1_t) (sa >> 0));
+            n = fbCombineDisjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = fbCombineDisjointInPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = fbCombineDisjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineB:
-            Fb = 0xffffffff;
+            Fb = ~0;
             break;
         }
         m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        n = FbGen (s,d,G_SHIFT,FbGet8(Fa,G_SHIFT),FbGet8(Fb,G_SHIFT),t, u, v);
+        o = FbGen (s,d,B_SHIFT,FbGet8(Fa,B_SHIFT),FbGet8(Fb,B_SHIFT),t, u, v);
+        p = FbGen (s,d,A_SHIFT,FbGet8(Fa,A_SHIFT),FbGet8(Fb,A_SHIFT),t, u, v);
         s = m|n|o|p;
 	*(dest + i) = s;
     }
 }
 
 FASTCALL static void
-fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
 }
 
 FASTCALL static void
-fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
 }
 
 FASTCALL static void
-fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineDisjointXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
 }
 
 FASTCALL static void
-fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+fbCombineConjointGeneralC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width, comp1_t combine)
 {
     int i;
 
     for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
+        comp4_t s, d;
+        comp4_t m,n,o,p;
+        comp4_t Fa, Fb;
+        comp2_t t, u, v;
+        comp4_t sa;
+        comp1_t da;
 
         s = *(src + i);
         m = *(mask + i);
         d = *(dest + i);
-        da = d >> 24;
+        da = d >> A_SHIFT;
 
 	fbCombineMaskC (&s, &m);
 
@@ -1250,21 +1252,21 @@ fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
             Fa = 0;
             break;
         case CombineAOut:
-            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            m = fbCombineConjointOutPart ((comp1_t) (sa >> 0), da);
+            n = fbCombineConjointOutPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = fbCombineConjointOutPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = fbCombineConjointOutPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineAIn:
-            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            m = fbCombineConjointInPart ((comp1_t) (sa >> 0), da);
+            n = fbCombineConjointInPart ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+            o = fbCombineConjointInPart ((comp1_t) (sa >> B_SHIFT), da) << B_SHIFT;
+            p = fbCombineConjointInPart ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
             Fa = m|n|o|p;
             break;
         case CombineA:
-            Fa = 0xffffffff;
+            Fa = ~0;
             break;
         }
 
@@ -1273,82 +1275,82 @@ fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int wi
             Fb = 0;
             break;
         case CombineBOut:
-            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            m = fbCombineConjointOutPart (da, (comp1_t) (sa >> 0));
+            n = fbCombineConjointOutPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = fbCombineConjointOutPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = fbCombineConjointOutPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineBIn:
-            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            m = fbCombineConjointInPart (da, (comp1_t) (sa >> 0));
+            n = fbCombineConjointInPart (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+            o = fbCombineConjointInPart (da, (comp1_t) (sa >> B_SHIFT)) << B_SHIFT;
+            p = fbCombineConjointInPart (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
             Fb = m|n|o|p;
             break;
         case CombineB:
-            Fb = 0xffffffff;
+            Fb = ~0;
             break;
         }
         m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        n = FbGen (s,d,G_SHIFT,FbGet8(Fa,G_SHIFT),FbGet8(Fb,G_SHIFT),t, u, v);
+        o = FbGen (s,d,B_SHIFT,FbGet8(Fa,B_SHIFT),FbGet8(Fb,B_SHIFT),t, u, v);
+        p = FbGen (s,d,A_SHIFT,FbGet8(Fa,A_SHIFT),FbGet8(Fb,A_SHIFT),t, u, v);
         s = m|n|o|p;
 	*(dest + i) = s;
     }
 }
 
 FASTCALL static void
-fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointOverC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
 }
 
 FASTCALL static void
-fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointOverReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
 }
 
 FASTCALL static void
-fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointInC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
 }
 
 FASTCALL static void
-fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointInReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
 }
 
 FASTCALL static void
-fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointOutC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
 }
 
 FASTCALL static void
-fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointOutReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
 }
 
 FASTCALL static void
-fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointAtopC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
 }
 
 FASTCALL static void
-fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointAtopReverseC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
 }
 
 FASTCALL static void
-fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+fbCombineConjointXorC (comp4_t *dest, comp4_t *src, comp4_t *mask, int width)
 {
     fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
 }
diff --git a/pixman/combine.pl b/pixman/combine.pl
index 126db6c..8024333 100644
--- a/pixman/combine.pl
+++ b/pixman/combine.pl
@@ -7,6 +7,47 @@ $size = int($ARGV[0]);
 $size == 8 or $size == 16 or die $usage;
 
 $pixel_size = $size * 4;
+$half_pixel_size = $size * 2;
+
+sub mask {
+    my $str = shift;
+    my $suffix;
+    $suffix = "ULL" if $size > 8;
+
+    return "0x" . $str . $suffix;
+}
+
+# Generate mask strings.
+$nibbles = $size / 4;
+$mask = "f" x $nibbles;
+$zero_mask = "0" x $nibbles;
+$one_half = "8" . "0" x ($nibbles - 1);
+
+print "/* WARNING: This file is generated by combine.pl from combine.inc.\n";
+print "   Please edit one of those files rather than this one. */\n";
+print "\n";
+
+# Mask and 1/2 value for a single component.
+print "#define COMPONENT_SIZE ", $size, "\n";
+print "#define MASK ", mask($mask), "\n";
+print "#define ONE_HALF ", mask($one_half), "\n";
+print "\n";
+
+# Shifts and masks for green, blue, and alpha.
+print "#define G_SHIFT ", $size, "\n";
+print "#define B_SHIFT ", $size * 2, "\n";
+print "#define A_SHIFT ", $size * 3, "\n";
+print "#define G_MASK ", mask($mask . $zero_mask), "\n";
+print "#define B_MASK ", mask($mask . $zero_mask x 2), "\n";
+print "#define A_MASK ", mask($mask . $zero_mask x 3), "\n";
+print "\n";
+
+# Special values for dealing with red + blue at the same time.
+print "#define RB_MASK ", mask($mask . $zero_mask . $mask), "\n";
+print "#define AG_MASK ", mask($mask . $zero_mask . $mask . $zero_mask), "\n";
+print "#define RB_ONE_HALF ", mask($one_half . $zero_mask . $one_half), "\n";
+print "#define RB_MASK_PLUS_ONE ", mask("1" . $zero_mask x 2 . "1" .  $zero_mask), "\n";
+print "\n";
 
 print "#line 1 \"combine.inc\"\n";
 while (<STDIN>) {
@@ -16,5 +57,10 @@ while (<STDIN>) {
     s/\bCombineMaskU\b/CombineMaskU$pixel_size/;
     s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
 
-    print $_;
+    # Convert comp*_t values into the appropriate real types.
+    s/comp1_t/uint${size}_t/g;
+    s/comp2_t/uint${half_pixel_size}_t/g;
+    s/comp4_t/uint${pixel_size}_t/g;
+
+    print;
 }
commit e0e5c4b72937728d0b36b1077d94ce92a2374c9a
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Mon Mar 31 16:42:53 2008 -0700

    Tack 32 onto the ends of the combining function types.
    
    Signed-off-by: Søren Sandmann <sandmann at redhat.com>

diff --git a/pixman/combine.pl b/pixman/combine.pl
index e73a85a..126db6c 100644
--- a/pixman/combine.pl
+++ b/pixman/combine.pl
@@ -1,3 +1,20 @@
+$usage = "Usage: combine.pl { 8 | 16 } < combine.inc";
+
+$#ARGV == 0 or die $usage;
+
+# Get the component size.
+$size = int($ARGV[0]);
+$size == 8 or $size == 16 or die $usage;
+
+$pixel_size = $size * 4;
+
+print "#line 1 \"combine.inc\"\n";
 while (<STDIN>) {
+    # Add 32/64 suffix to combining function types.
+    s/\bCombineFuncC\b/CombineFuncC$pixel_size/;
+    s/\bCombineFuncU\b/CombineFuncU$pixel_size/;
+    s/\bCombineMaskU\b/CombineMaskU$pixel_size/;
+    s/\bFbComposeFunctions\b/FbComposeFunctions$pixel_size/;
+
     print $_;
 }
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 4cb304d..81b7b1a 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -302,7 +302,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	PIXMAN_FORMAT_RGB (data->mask->bits.format))
     {
 	uint32_t *mask_buffer = dest_buffer + data->width;
-	CombineFuncC compose = pixman_composeFunctions.combineC[data->op];
+	CombineFuncC32 compose = pixman_composeFunctions.combineC[data->op];
 	if (!compose)
 	    return;
 
@@ -366,7 +366,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
     else
     {
 	uint32_t *src_mask_buffer = 0, *mask_buffer = 0;
-	CombineFuncU compose = pixman_composeFunctions.combineU[data->op];
+	CombineFuncU32 compose = pixman_composeFunctions.combineU[data->op];
 	if (!compose)
 	    return;
 
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 714a6a5..e8c3f39 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -143,9 +143,9 @@ typedef struct point point_t;
  */
 
 #define FASTCALL
-typedef FASTCALL void (*CombineMaskU) (uint32_t *src, const uint32_t *mask, int width);
-typedef FASTCALL void (*CombineFuncU) (uint32_t *dest, const uint32_t *src, int width);
-typedef FASTCALL void (*CombineFuncC) (uint32_t *dest, uint32_t *src, uint32_t *mask, int width);
+typedef FASTCALL void (*CombineMaskU32) (uint32_t *src, const uint32_t *mask, int width);
+typedef FASTCALL void (*CombineFuncU32) (uint32_t *dest, const uint32_t *src, int width);
+typedef FASTCALL void (*CombineFuncC32) (uint32_t *dest, uint32_t *src, uint32_t *mask, int width);
 typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width,
                                    uint32_t *buffer);
 typedef FASTCALL uint32_t (*fetchPixelProc)(bits_image_t *pict, int offset, int line);
@@ -153,6 +153,10 @@ typedef FASTCALL void (*storeProc)(pixman_image_t *, uint32_t *bits,
                                    const uint32_t *values, int x, int width,
                                    const pixman_indexed_t *);
 
+typedef FASTCALL void (*CombineMaskU64) (uint64_t *src, const uint64_t *mask, int width);
+typedef FASTCALL void (*CombineFuncU64) (uint64_t *dest, const uint64_t *src, int width);
+typedef FASTCALL void (*CombineFuncC64) (uint64_t *dest, uint64_t *src, uint64_t *mask, int width);
+
 typedef struct _FbComposeData {
     uint8_t	 op;
     pixman_image_t	*src;
@@ -168,13 +172,19 @@ typedef struct _FbComposeData {
     uint16_t	 height;
 } FbComposeData;
 
-typedef struct _FbComposeFunctions {
-    CombineFuncU *combineU;
-    CombineFuncC *combineC;
-    CombineMaskU combineMaskU;
-} FbComposeFunctions;
+typedef struct _FbComposeFunctions32 {
+    CombineFuncU32 *combineU;
+    CombineFuncC32 *combineC;
+    CombineMaskU32 combineMaskU;
+} FbComposeFunctions32;
+
+typedef struct _FbComposeFunctions64 {
+    CombineFuncU64 *combineU;
+    CombineFuncC64 *combineC;
+    CombineMaskU64 combineMaskU;
+} FbComposeFunctions64;
 
-extern FbComposeFunctions pixman_composeFunctions;
+extern FbComposeFunctions32 pixman_composeFunctions;
 
 void pixman_composite_rect_general_accessors (const FbComposeData *data,
 					      uint32_t *scanline_buffer);
commit f88519ed3fed42e41aa5623540466d0bee1a887b
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Mon Mar 31 16:33:09 2008 -0700

    Move combining routines into combine.inc and add a Perl rule to generate it.
    
    This will eventually be used to search & replace types and mask/shift
    calculations to generate a wide version of these functions.
    
    Signed-off-by: Søren Sandmann <sandmann at redhat.com>

diff --git a/configure.ac b/configure.ac
index 58da61c..6b0efcb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -102,6 +102,12 @@ if test "x$GCC" = "xyes"; then
   *) CFLAGS="$CFLAGS -Wall" ;;
   esac fi changequote([,])dnl
 
+AC_PATH_PROG(PERL, perl, no)
+if test "x$PERL" = xno; then
+    AC_MSG_ERROR([Perl is required to build pixman.])
+fi
+AC_SUBST(PERL)
+
 dnl =========================================================================
 dnl -fvisibility stuff
 
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 190df4c..9ef64ee 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -8,7 +8,7 @@ libpixman_1_la_SOURCES =		\
 	pixman-region.c			\
 	pixman-private.h		\
 	pixman-image.c			\
-	pixman-combine.c		\
+	pixman-combine32.c		\
 	pixman-compose.c		\
 	pixman-compose-accessors.c	\
 	pixman-pict.c			\
@@ -27,7 +27,11 @@ libpixmanincludedir = $(includedir)/pixman-1/
 libpixmaninclude_HEADERS = pixman.h pixman-version.h
 noinst_LTLIBRARIES = 
 
-EXTRA_DIST = Makefile.win32
+pixman-combine32.c : combine.inc combine.pl
+	$(PERL) $(srcdir)/combine.pl 8 < $< > $@ || ($(RM) $@; exit 1)
+
+EXTRA_DIST = Makefile.win32 combine.inc combine.pl
+CLEANFILES = pixman-combine32.c
 
 # mmx code
 if USE_MMX
diff --git a/pixman/combine.inc b/pixman/combine.inc
new file mode 100644
index 0000000..9cd51a4
--- /dev/null
+++ b/pixman/combine.inc
@@ -0,0 +1,1454 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "pixman-private.h"
+
+/*
+ * Helper macros.
+ */
+
+#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (FbIntMult(FbGet8(y,i),ay,(u)) + \
+					 FbIntMult(FbGet8(x,i),ax,(v))), \
+				  (uint32_t) ((uint8_t) ((t) |		\
+							 (0 - ((t) >> 8)))) << (i))
+
+
+/*
+  The methods below use some tricks to be able to do two color
+  components at the same time.
+*/
+
+/*
+  x_c = (x_c * a) / 255
+*/
+#define FbByteMul(x, a) do {					    \
+        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
+        t &= 0xff00ff;						    \
+								    \
+        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
+        x = (x + ((x >> 8) & 0xff00ff));			    \
+        x &= 0xff00ff00;					    \
+        x += t;							    \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAdd(x, a, y) do {				    \
+        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
+        t &= 0xff00ff;						    \
+        t += y & 0xff00ff;					    \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
+        t &= 0xff00ff;						    \
+								    \
+        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
+        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
+        x &= 0xff00ff;                                              \
+        x += (y >> 8) & 0xff00ff;                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
+        x &= 0xff00ff;                                              \
+        x <<= 8;                                                    \
+        x += t;                                                     \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c * b) / 255
+*/
+#define FbByteAddMul(x, a, y, b) do {                                   \
+        uint32_t t;							\
+        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
+        t += (t >> 8) + 0x8000;                                         \
+        t >>= 16;                                                       \
+									\
+        t |= r << 16;                                                   \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        t &= 0xff00ff;                                                  \
+        t <<= 8;                                                        \
+									\
+        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
+        x += (x >> 8);                                                  \
+        x >>= 8;                                                        \
+        x |= r << 16;                                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
+        x &= 0xff00ff;                                                  \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c *b) / 256
+*/
+#define FbByteAddMul_256(x, a, y, b) do {                               \
+        uint32_t t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;		\
+        t >>= 8;                                                        \
+        t &= 0xff00ff;                                                  \
+									\
+        x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;      \
+        x &= 0xff00ff00;                                                \
+        x += t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c) / 255
+*/
+#define FbByteMulC(x, a) do {				  \
+        uint32_t t;                                       \
+        uint32_t r = (x & 0xff) * (a & 0xff);             \
+        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
+	r += 0x800080;					  \
+        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
+        r &= 0xff00ff;					  \
+							  \
+        x >>= 8;					  \
+        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
+        t |= (x & 0xff0000) * (a >> 24);		  \
+        t += 0x800080;					  \
+        t = t + ((t >> 8) & 0xff00ff);			  \
+        x = r | (t & 0xff00ff00);			  \
+							  \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAddC(x, a, y) do {				      \
+        uint32_t t;                                                   \
+        uint32_t r = (x & 0xff) * (a & 0xff);                         \
+        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
+	r += 0x800080;						      \
+	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
+        r &= 0xff00ff;						      \
+        r += y & 0xff00ff;					      \
+        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
+        r &= 0xff00ff;						      \
+								      \
+        x >>= 8;                                                       \
+        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
+        t |= (x & 0xff0000) * (a >> 24);                               \
+	t += 0x800080;                                                 \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
+        t &= 0xff00ff;                                                 \
+        t += (y >> 8) & 0xff00ff;                                      \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
+        t &= 0xff00ff;                                                 \
+        x = r | (t << 8);                                              \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c + y_c * b) / 255
+*/
+#define FbByteAddMulC(x, a, y, b) do {                                  \
+        uint32_t t;							\
+        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
+        r += (r >> 8) + 0x80;                                           \
+        r >>= 8;                                                        \
+									\
+        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
+        t += (t >> 8) + 0x8000;                                         \
+        t >>= 16;                                                       \
+									\
+        t |= r << 16;                                                   \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        t &= 0xff00ff;                                                  \
+        t <<= 8;                                                        \
+									\
+        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
+        x += (x >> 8);                                                  \
+        x >>= 8;                                                        \
+        x |= r << 16;                                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
+        x &= 0xff00ff;                                                  \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = min(x_c + y_c, 255)
+*/
+#define FbByteAdd(x, y) do {                                            \
+        uint32_t t;							\
+        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
+        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
+        r &= 0xff00ff;                                                  \
+									\
+        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        r |= (t & 0xff00ff) << 8;                                       \
+        x = r;                                                          \
+    } while (0)
+
+
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'U' at the end of the name,
+ * the component version has a 'C'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+
+
+/*
+ * Combine src and mask
+ */
+FASTCALL static void
+pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t a = *(mask + i) >> 24;
+        uint32_t s = *(src + i);
+        FbByteMul(s, a);
+        *(src + i) = s;
+    }
+}
+
+/*
+ * All of the composing functions
+ */
+
+FASTCALL static void
+fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL static void
+fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
+{
+    memcpy(dest, src, width*sizeof(uint32_t));
+}
+
+/* if the Src is opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~s);
+
+        FbByteMulAdd(d, ia, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, this is a noop */
+FASTCALL static void
+fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~*(dest + i));
+        FbByteMulAdd(s, ia, d);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, this is a noop */
+FASTCALL static void
+fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(~*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(~*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineInU */
+/* if the Dst is opaque, call fbCombineOverU */
+/* if both the Src and Dst are opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t dest_a = Alpha(d);
+        uint32_t src_ia = Alpha(~s);
+
+        FbByteAddMul(s, dest_a, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverReverseU */
+/* if the Dst is opaque, call fbCombineInReverseU */
+/* if both the Src and Dst are opaque, call fbCombineDstU */
+FASTCALL static void
+fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_a = Alpha(s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverU */
+/* if the Dst is opaque, call fbCombineOverReverseU */
+/* if both the Src and Dst are opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_ia = Alpha(~s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineAddU */
+/* if the Dst is opaque, call fbCombineAddU */
+/* if both the Src and Dst are opaque, call fbCombineAddU */
+FASTCALL static void
+fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint16_t  sa, da;
+
+        sa = s >> 24;
+        da = ~d >> 24;
+        if (sa > da)
+        {
+            sa = FbIntDiv(da, sa);
+            FbByteMul(s, sa);
+        };
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+
+/*
+ * All of the disjoint composing functions
+
+ The four entries in the first column indicate what source contributions
+ come from each of the four areas of the picture -- areas covered by neither
+ A nor B, areas covered only by A, areas covered only by B and finally
+ areas covered by both A and B.
+
+ Disjoint			Conjoint
+ Fa		Fb		Fa		Fb
+ (0,0,0,0)	0		0		0		0
+ (0,A,0,A)	1		0		1		0
+ (0,0,B,B)	0		1		0		1
+ (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
+ (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
+ (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
+ (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
+ (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
+ (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
+ (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
+ (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
+ (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
+
+*/
+
+#define CombineAOut 1
+#define CombineAIn  2
+#define CombineBOut 4
+#define CombineBIn  8
+
+#define CombineClear	0
+#define CombineA	(CombineAOut|CombineAIn)
+#define CombineB	(CombineBOut|CombineBIn)
+#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
+#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
+#define CombineAAtop	(CombineBOut|CombineAIn)
+#define CombineBAtop	(CombineAOut|CombineBIn)
+#define CombineXor	(CombineAOut|CombineBOut)
+
+/* portion covered by a but not b */
+FASTCALL static uint8_t
+fbCombineDisjointOutPart (uint8_t a, uint8_t b)
+{
+    /* min (1, (1-b) / a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* (1-b) / a */
+}
+
+/* portion covered by both a and b */
+FASTCALL static uint8_t
+fbCombineDisjointInPart (uint8_t a, uint8_t b)
+{
+    /* max (1-(1-b)/a,0) */
+    /*  = - min ((1-b)/a - 1, 0) */
+    /*  = 1 - min (1, (1-b)/a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0;	    /* 1 - 1 */
+    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
+}
+
+/* portion covered by a but not b */
+FASTCALL static uint8_t
+fbCombineConjointOutPart (uint8_t a, uint8_t b)
+{
+    /* max (1-b/a,0) */
+    /* = 1-min(b/a,1) */
+
+    /* min (1, (1-b) / a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0x00;	    /* 0 */
+    return ~FbIntDiv(b,a);   /* 1 - b/a */
+}
+
+/* portion covered by both a and b */
+FASTCALL static uint8_t
+fbCombineConjointInPart (uint8_t a, uint8_t b)
+{
+    /* min (1,b/a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* b/a */
+}
+
+FASTCALL static void
+fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t m,n,o,p;
+        uint16_t Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineDisjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineDisjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineDisjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineDisjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint16_t  a = s >> 24;
+
+        if (a != 0x00)
+        {
+            if (a != 0xff)
+            {
+                uint32_t d = *(dest + i);
+                a = fbCombineDisjointOutPart (d >> 24, a);
+                FbByteMulAdd(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
+}
+
+FASTCALL static void
+fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t  m,n,o,p;
+        uint16_t  Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineConjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineConjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineConjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineConjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
+}
+
+
+FASTCALL static void
+fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
+}
+
+
+FASTCALL static void
+fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
+}
+
+
+FASTCALL static void
+fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineXor);
+}
+
+/********************************************************************************/
+/*************************** Per Channel functions ******************************/
+/********************************************************************************/
+
+FASTCALL static void
+fbCombineMaskC (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+
+    uint32_t	x;
+    uint16_t	xa;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    x = *(src);
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    xa = x >> 24;
+    FbByteMulC(x, a);
+    *(src) = x;
+    FbByteMul(a, xa);
+    *(mask) = a;
+}
+
+FASTCALL static void
+fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t	x;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    if (a == 0xffffffff)
+	return;
+
+    x = *(src);
+    FbByteMulC(x, a);
+    *(src) =x;
+}
+
+FASTCALL static void
+fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t	x;
+
+    if (!a)
+	return;
+
+    x = *(src) >> 24;
+    if (x == 0xff)
+	return;
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    FbByteMul(a, x);
+    *(mask) = a;
+}
+
+
+
+FASTCALL static void
+fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL static void
+fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+	*(dest) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskC (&s, &m);
+
+	a = ~m;
+        if (a != 0xffffffff)
+        {
+            if (a)
+            {
+                uint32_t d = *(dest + i);
+                FbByteMulAddC(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = ~d >> 24;
+
+        if (a)
+        {
+            uint32_t s = *(src + i);
+	    uint32_t m = *(mask + i);
+
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMulAdd(s, a, d);
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+	a = m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = ~d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+        a = ~m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t d = *(dest + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint16_t  sa, sr, sg, sb, da;
+        uint16_t  t, u, v;
+        uint32_t  m,n,o,p;
+
+        d = *(dest + i);
+        s = *(src + i);
+	m = *(mask + i);
+
+	fbCombineMaskC (&s, &m);
+
+        sa = (m >> 24);
+        sr = (m >> 16) & 0xff;
+        sg = (m >>  8) & 0xff;
+        sb = (m      ) & 0xff;
+        da = ~d >> 24;
+
+        if (sb <= da)
+            m = FbAdd(s,d,0,t);
+        else
+            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
+
+        if (sg <= da)
+            n = FbAdd(s,d,8,t);
+        else
+            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
+
+        if (sr <= da)
+            o = FbAdd(s,d,16,t);
+        else
+            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
+
+        if (sa <= da)
+            p = FbAdd(s,d,24,t);
+        else
+            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
+
+	*(dest + i) = m|n|o|p;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL static void
+fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+FASTCALL static void
+fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL static void
+fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
+}
+
+FASTCALL static void
+fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+static CombineFuncU pixman_fbCombineFuncU[] = {
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineOverU,
+    fbCombineOverReverseU,
+    fbCombineInU,
+    fbCombineInReverseU,
+    fbCombineOutU,
+    fbCombineOutReverseU,
+    fbCombineAtopU,
+    fbCombineAtopReverseU,
+    fbCombineXorU,
+    fbCombineAddU,
+    fbCombineSaturateU,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineDisjointOverU,
+    fbCombineSaturateU, /* DisjointOverReverse */
+    fbCombineDisjointInU,
+    fbCombineDisjointInReverseU,
+    fbCombineDisjointOutU,
+    fbCombineDisjointOutReverseU,
+    fbCombineDisjointAtopU,
+    fbCombineDisjointAtopReverseU,
+    fbCombineDisjointXorU,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineConjointOverU,
+    fbCombineConjointOverReverseU,
+    fbCombineConjointInU,
+    fbCombineConjointInReverseU,
+    fbCombineConjointOutU,
+    fbCombineConjointOutReverseU,
+    fbCombineConjointAtopU,
+    fbCombineConjointAtopReverseU,
+    fbCombineConjointXorU,
+};
+
+static CombineFuncC pixman_fbCombineFuncC[] = {
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineOverC,
+    fbCombineOverReverseC,
+    fbCombineInC,
+    fbCombineInReverseC,
+    fbCombineOutC,
+    fbCombineOutReverseC,
+    fbCombineAtopC,
+    fbCombineAtopReverseC,
+    fbCombineXorC,
+    fbCombineAddC,
+    fbCombineSaturateC,
+    NULL,
+    NULL,
+    fbCombineClearC,	    /* 0x10 */
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineDisjointOverC,
+    fbCombineSaturateC, /* DisjointOverReverse */
+    fbCombineDisjointInC,
+    fbCombineDisjointInReverseC,
+    fbCombineDisjointOutC,
+    fbCombineDisjointOutReverseC,
+    fbCombineDisjointAtopC,
+    fbCombineDisjointAtopReverseC,
+    fbCombineDisjointXorC,  /* 0x1b */
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineConjointOverC,
+    fbCombineConjointOverReverseC,
+    fbCombineConjointInC,
+    fbCombineConjointInReverseC,
+    fbCombineConjointOutC,
+    fbCombineConjointOutReverseC,
+    fbCombineConjointAtopC,
+    fbCombineConjointAtopReverseC,
+    fbCombineConjointXorC,
+};
+
+FbComposeFunctions pixman_composeFunctions = {
+    pixman_fbCombineFuncU,
+    pixman_fbCombineFuncC,
+    pixman_fbCombineMaskU
+};
diff --git a/pixman/combine.pl b/pixman/combine.pl
new file mode 100644
index 0000000..e73a85a
--- /dev/null
+++ b/pixman/combine.pl
@@ -0,0 +1,3 @@
+while (<STDIN>) {
+    print $_;
+}
commit 30746b1e1e5101fd1502c676e777e27953772f75
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Mon Mar 31 16:09:44 2008 -0700

    Move combining macros into pixman-combine.c.
    
    Signed-off-by: Søren Sandmann <sandmann at redhat.com>

diff --git a/pixman/pixman-combine.c b/pixman/pixman-combine.c
index d201736..f6023e6 100644
--- a/pixman/pixman-combine.c
+++ b/pixman/pixman-combine.c
@@ -5,6 +5,194 @@
 #include <string.h>
 
 #include "pixman-private.h"
+
+/*
+ * Helper macros.
+ */
+
+#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (FbIntMult(FbGet8(y,i),ay,(u)) + \
+					 FbIntMult(FbGet8(x,i),ax,(v))), \
+				  (uint32_t) ((uint8_t) ((t) |		\
+							 (0 - ((t) >> 8)))) << (i))
+
+
+/*
+  The methods below use some tricks to be able to do two color
+  components at the same time.
+*/
+
+/*
+  x_c = (x_c * a) / 255
+*/
+#define FbByteMul(x, a) do {					    \
+        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
+        t &= 0xff00ff;						    \
+								    \
+        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
+        x = (x + ((x >> 8) & 0xff00ff));			    \
+        x &= 0xff00ff00;					    \
+        x += t;							    \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAdd(x, a, y) do {				    \
+        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
+        t &= 0xff00ff;						    \
+        t += y & 0xff00ff;					    \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
+        t &= 0xff00ff;						    \
+								    \
+        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
+        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
+        x &= 0xff00ff;                                              \
+        x += (y >> 8) & 0xff00ff;                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
+        x &= 0xff00ff;                                              \
+        x <<= 8;                                                    \
+        x += t;                                                     \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c * b) / 255
+*/
+#define FbByteAddMul(x, a, y, b) do {                                   \
+        uint32_t t;							\
+        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
+        t += (t >> 8) + 0x8000;                                         \
+        t >>= 16;                                                       \
+									\
+        t |= r << 16;                                                   \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        t &= 0xff00ff;                                                  \
+        t <<= 8;                                                        \
+									\
+        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
+        x += (x >> 8);                                                  \
+        x >>= 8;                                                        \
+        x |= r << 16;                                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
+        x &= 0xff00ff;                                                  \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c *b) / 256
+*/
+#define FbByteAddMul_256(x, a, y, b) do {                               \
+        uint32_t t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;		\
+        t >>= 8;                                                        \
+        t &= 0xff00ff;                                                  \
+									\
+        x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;      \
+        x &= 0xff00ff00;                                                \
+        x += t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c) / 255
+*/
+#define FbByteMulC(x, a) do {				  \
+        uint32_t t;                                       \
+        uint32_t r = (x & 0xff) * (a & 0xff);             \
+        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
+	r += 0x800080;					  \
+        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
+        r &= 0xff00ff;					  \
+							  \
+        x >>= 8;					  \
+        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
+        t |= (x & 0xff0000) * (a >> 24);		  \
+        t += 0x800080;					  \
+        t = t + ((t >> 8) & 0xff00ff);			  \
+        x = r | (t & 0xff00ff00);			  \
+							  \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAddC(x, a, y) do {				      \
+        uint32_t t;                                                   \
+        uint32_t r = (x & 0xff) * (a & 0xff);                         \
+        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
+	r += 0x800080;						      \
+	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
+        r &= 0xff00ff;						      \
+        r += y & 0xff00ff;					      \
+        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
+        r &= 0xff00ff;						      \
+								      \
+        x >>= 8;                                                       \
+        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
+        t |= (x & 0xff0000) * (a >> 24);                               \
+	t += 0x800080;                                                 \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
+        t &= 0xff00ff;                                                 \
+        t += (y >> 8) & 0xff00ff;                                      \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
+        t &= 0xff00ff;                                                 \
+        x = r | (t << 8);                                              \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c + y_c * b) / 255
+*/
+#define FbByteAddMulC(x, a, y, b) do {                                  \
+        uint32_t t;							\
+        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
+        r += (r >> 8) + 0x80;                                           \
+        r >>= 8;                                                        \
+									\
+        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
+        t += (t >> 8) + 0x8000;                                         \
+        t >>= 16;                                                       \
+									\
+        t |= r << 16;                                                   \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        t &= 0xff00ff;                                                  \
+        t <<= 8;                                                        \
+									\
+        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
+        x += (x >> 8);                                                  \
+        x >>= 8;                                                        \
+        x |= r << 16;                                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
+        x &= 0xff00ff;                                                  \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = min(x_c + y_c, 255)
+*/
+#define FbByteAdd(x, y) do {                                            \
+        uint32_t t;							\
+        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
+        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
+        r &= 0xff00ff;                                                  \
+									\
+        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        r |= (t & 0xff00ff) << 8;                                       \
+        x = r;                                                          \
+    } while (0)
+
+
 /*
  * There are two ways of handling alpha -- either as a single unified value or
  * a separate value for each component, hence each macro must have two
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index e236767..714a6a5 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -441,191 +441,9 @@ union pixman_image
 
 #define FbInC(x,i,a,t) ((uint32_t) FbIntMult(FbGet8(x,i),FbGet8(a,i),(t)) << (i))
 
-#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (FbIntMult(FbGet8(y,i),ay,(u)) + \
-					 FbIntMult(FbGet8(x,i),ax,(v))), \
-				  (uint32_t) ((uint8_t) ((t) |		\
-							 (0 - ((t) >> 8)))) << (i))
-
 #define FbAdd(x,y,i,t)	((t) = FbGet8(x,i) + FbGet8(y,i),		\
 			 (uint32_t) ((uint8_t) ((t) | (0 - ((t) >> 8)))) << (i))
 
-
-/*
-  The methods below use some tricks to be able to do two color
-  components at the same time.
-*/
-
-/*
-  x_c = (x_c * a) / 255
-*/
-#define FbByteMul(x, a) do {					    \
-        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
-        t &= 0xff00ff;						    \
-								    \
-        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
-        x = (x + ((x >> 8) & 0xff00ff));			    \
-        x &= 0xff00ff00;					    \
-        x += t;							    \
-    } while (0)
-
-/*
-  x_c = (x_c * a) / 255 + y
-*/
-#define FbByteMulAdd(x, a, y) do {				    \
-        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
-        t &= 0xff00ff;						    \
-        t += y & 0xff00ff;					    \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
-        t &= 0xff00ff;						    \
-								    \
-        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
-        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
-        x &= 0xff00ff;                                              \
-        x += (y >> 8) & 0xff00ff;                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
-        x &= 0xff00ff;                                              \
-        x <<= 8;                                                    \
-        x += t;                                                     \
-    } while (0)
-
-/*
-  x_c = (x_c * a + y_c * b) / 255
-*/
-#define FbByteAddMul(x, a, y, b) do {                                   \
-        uint32_t t;							\
-        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
-        t += (t >> 8) + 0x8000;                                         \
-        t >>= 16;                                                       \
-									\
-        t |= r << 16;                                                   \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        t &= 0xff00ff;                                                  \
-        t <<= 8;                                                        \
-									\
-        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
-        x += (x >> 8);                                                  \
-        x >>= 8;                                                        \
-        x |= r << 16;                                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
-        x &= 0xff00ff;                                                  \
-        x |= t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a + y_c *b) / 256
-*/
-#define FbByteAddMul_256(x, a, y, b) do {                               \
-        uint32_t t = (x & 0xff00ff) * a + (y & 0xff00ff) * b;		\
-        t >>= 8;                                                        \
-        t &= 0xff00ff;                                                  \
-									\
-        x = ((x >> 8) & 0xff00ff) * a + ((y >> 8) & 0xff00ff) * b;      \
-        x &= 0xff00ff00;                                                \
-        x += t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a_c) / 255
-*/
-#define FbByteMulC(x, a) do {				  \
-        uint32_t t;                                       \
-        uint32_t r = (x & 0xff) * (a & 0xff);             \
-        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
-	r += 0x800080;					  \
-        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
-        r &= 0xff00ff;					  \
-							  \
-        x >>= 8;					  \
-        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
-        t |= (x & 0xff0000) * (a >> 24);		  \
-        t += 0x800080;					  \
-        t = t + ((t >> 8) & 0xff00ff);			  \
-        x = r | (t & 0xff00ff00);			  \
-							  \
-    } while (0)
-
-/*
-  x_c = (x_c * a) / 255 + y
-*/
-#define FbByteMulAddC(x, a, y) do {				      \
-        uint32_t t;                                                   \
-        uint32_t r = (x & 0xff) * (a & 0xff);                         \
-        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
-	r += 0x800080;						      \
-	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
-        r &= 0xff00ff;						      \
-        r += y & 0xff00ff;					      \
-        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
-        r &= 0xff00ff;						      \
-								      \
-        x >>= 8;                                                       \
-        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
-        t |= (x & 0xff0000) * (a >> 24);                               \
-	t += 0x800080;                                                 \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
-        t &= 0xff00ff;                                                 \
-        t += (y >> 8) & 0xff00ff;                                      \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
-        t &= 0xff00ff;                                                 \
-        x = r | (t << 8);                                              \
-    } while (0)
-
-/*
-  x_c = (x_c * a_c + y_c * b) / 255
-*/
-#define FbByteAddMulC(x, a, y, b) do {                                  \
-        uint32_t t;							\
-        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
-        r += (r >> 8) + 0x80;                                           \
-        r >>= 8;                                                        \
-									\
-        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
-        t += (t >> 8) + 0x8000;                                         \
-        t >>= 16;                                                       \
-									\
-        t |= r << 16;                                                   \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        t &= 0xff00ff;                                                  \
-        t <<= 8;                                                        \
-									\
-        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
-        x += (x >> 8);                                                  \
-        x >>= 8;                                                        \
-        x |= r << 16;                                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
-        x &= 0xff00ff;                                                  \
-        x |= t;                                                         \
-    } while (0)
-
-/*
-  x_c = min(x_c + y_c, 255)
-*/
-#define FbByteAdd(x, y) do {                                            \
-        uint32_t t;							\
-        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
-        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
-        r &= 0xff00ff;                                                  \
-									\
-        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        r |= (t & 0xff00ff) << 8;                                       \
-        x = r;                                                          \
-    } while (0)
-
 #define div_255(x) (((x) + 0x80 + (((x) + 0x80) >> 8)) >> 8)
 
 #define MOD(a,b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))


More information about the xorg-commit mailing list