pixman: Branch 'master' - 6 commits

Aaron Plattner aplattner at kemper.freedesktop.org
Sun Mar 23 22:50:47 PDT 2008


 pixman/Makefile.am                    |    6 
 pixman/Makefile.win32                 |    6 
 pixman/pixman-access-accessors.c      |    3 
 pixman/pixman-access.c                | 1686 +++++++++++++
 pixman/pixman-combine.c               | 1260 ++++++++++
 pixman/pixman-compose.c               | 4278 ----------------------------------
 pixman/pixman-pict.c                  |   46 
 pixman/pixman-private.h               |   44 
 pixman/pixman-source.c                |  681 +++++
 pixman/pixman-transformed-accessors.c |    3 
 pixman/pixman-transformed.c           |  726 +++++
 pixman/pixman-trap.c                  |    2 
 12 files changed, 4445 insertions(+), 4296 deletions(-)

New commits:
commit 4f33f454c6dbaf356d20cee8d35fdf02f9a0317d
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Mar 21 18:59:45 2008 -0700

    Move fbFetchTransformed and friends into pixman-transformed.c.

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 79c6788..37d892b 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -13,6 +13,8 @@ libpixman_1_la_SOURCES =		\
 	pixman-compose-accessors.c	\
 	pixman-pict.c		\
 	pixman-source.c		\
+	pixman-transformed.c	\
+	pixman-transformed-accessors.c	\
 	pixman-utils.c		\
 	pixman-edge.c		\
 	pixman-edge-accessors.c		\
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index b4096f2..bee8289 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -31,6 +31,8 @@ SOURCES = \
 	pixman-compose-accessors.c	\
 	pixman-pict.c					\
 	pixman-source.c					\
+	pixman-transformed.c				\
+	pixman-transformed-accessors.c			\
 	pixman-utils.c					\
 	pixman-edge.c					\
 	pixman-edge-accessors.c		\
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 00d0171..6f524d6 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -38,15 +38,28 @@
 #ifdef PIXMAN_FB_ACCESSORS
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
 #define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions_accessors
+
 #define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
 #define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
 #define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed_accessors
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha_accessors
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha_accessors
+
 #else
+
 #define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
 #define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions
+
 #define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
 #define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
 #define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha
+
 #endif
 
 static unsigned int
@@ -109,8 +122,6 @@ SourcePictureClassify (source_image_t *pict,
     return pict->class;
 }
 
-#define SCANLINE_BUFFER_LENGTH 2048
-
 static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t color;
@@ -140,635 +151,6 @@ FbComposeFunctions PIXMAN_COMPOSE_FUNCTIONS = {
     pixman_fbCombineMaskU
 };
 
-/*
- * Fetch from region strategies
- */
-typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box);
-
-static inline uint32_t
-fbFetchFromNoRegion(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
-{
-    return fetch (pict, x, y);
-}
-
-static uint32_t
-fbFetchFromNRectangles(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
-{
-    pixman_box16_t box2;
-    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box2))
-        return fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
-    else
-        return 0;
-}
-
-static uint32_t
-fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
-{
-    pixman_box16_t box2 = *box;
-    return ((x < box2.x1) | (x >= box2.x2) | (y < box2.y1) | (y >= box2.y2)) ?
-        0 : fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
-}
-
-/*
- * Fetching Algorithms
- */
-static void
-fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t* box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int x, y, i;
-
-    /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for ( i = 0; i < width; ++i)
-    {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2])
-            {
-                *(buffer + i) = 0;
-            }
-            else
-            {
-                if (!affine)
-                {
-                    y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
-                    x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
-                }
-                else
-                {
-                    y = MOD(v.vector[1]>>16, pict->height);
-                    x = MOD(v.vector[0]>>16, pict->width);
-                }
-                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
-            }
-        }
-
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int x, y, i;
-
-    /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for (i = 0; i < width; ++i)
-    {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2])
-            {
-                *(buffer + i) = 0;
-            }
-            else
-            {
-                if (!affine)
-                {
-                    y = CLIP(DIV(v.vector[1], v.vector[2]), 0, pict->height-1);
-                    x = CLIP(DIV(v.vector[0], v.vector[2]), 0, pict->width-1);
-                }
-                else
-                {
-                    y = CLIP(v.vector[1]>>16, 0, pict->height-1);
-                    x = CLIP(v.vector[0]>>16, 0, pict->width-1);
-                }
-
-                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
-            }
-        }
-
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int x, y, i;
-
-    /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-    {
-        box = &(pict->common.src_clip->extents);
-        fetchFromRegion = fbFetchFromOneRectangle;
-    }
-    else
-    {
-        fetchFromRegion = fbFetchFromNRectangles;
-    }
-
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                if (!affine) {
-                    y = DIV(v.vector[1],v.vector[2]);
-                    x = DIV(v.vector[0],v.vector[2]);
-                } else {
-                    y = v.vector[1]>>16;
-                    x = v.vector[0]>>16;
-                }
-                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int i;
-
-    /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, distx, idistx, disty, idisty;
-                uint32_t tl, tr, bl, br, r;
-                uint32_t ft, fb;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t div;
-                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
-                    x1 = div >> 16;
-                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
-                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
-                    y1 = div >> 16;
-                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
-                } else {
-                    x1 = v.vector[0] >> 16;
-                    distx = (v.vector[0] >> 8) & 0xff;
-                    y1 = v.vector[1] >> 16;
-                    disty = (v.vector[1] >> 8) & 0xff;
-                }
-                x2 = x1 + 1;
-                y2 = y1 + 1;
-
-                idistx = 256 - distx;
-                idisty = 256 - disty;
-
-                x1 = MOD (x1, pict->width);
-                x2 = MOD (x2, pict->width);
-                y1 = MOD (y1, pict->height);
-                y2 = MOD (y2, pict->height);
-
-                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
-                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
-                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
-                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
-
-                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
-                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
-                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
-                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
-                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
-                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
-                r |= (((ft * idisty + fb * disty)) & 0xff0000);
-                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
-                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
-                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-                *(buffer + i) = r;
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int i;
-
-    /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-        fetchFromRegion = fbFetchFromNoRegion;
-    else
-        fetchFromRegion = fbFetchFromNRectangles;
-
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, distx, idistx, disty, idisty;
-                uint32_t tl, tr, bl, br, r;
-                uint32_t ft, fb;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t div;
-                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
-                    x1 = div >> 16;
-                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
-                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
-                    y1 = div >> 16;
-                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
-                } else {
-                    x1 = v.vector[0] >> 16;
-                    distx = (v.vector[0] >> 8) & 0xff;
-                    y1 = v.vector[1] >> 16;
-                    disty = (v.vector[1] >> 8) & 0xff;
-                }
-                x2 = x1 + 1;
-                y2 = y1 + 1;
-
-                idistx = 256 - distx;
-                idisty = 256 - disty;
-
-                x1 = CLIP (x1, 0, pict->width-1);
-                x2 = CLIP (x2, 0, pict->width-1);
-                y1 = CLIP (y1, 0, pict->height-1);
-                y2 = CLIP (y2, 0, pict->height-1);
-
-                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
-                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
-                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
-                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
-
-                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
-                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
-                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
-                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
-                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
-                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
-                r |= (((ft * idisty + fb * disty)) & 0xff0000);
-                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
-                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
-                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-                *(buffer + i) = r;
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t *box = NULL;
-    fetchPixelProc   fetch;
-    fetchFromRegionProc fetchFromRegion;
-    int i;
-
-    /* initialize the two function pointers */
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
-    {
-        box = &(pict->common.src_clip->extents);
-        fetchFromRegion = fbFetchFromOneRectangle;
-    }
-    else
-    {
-        fetchFromRegion = fbFetchFromNRectangles;
-    }
-
-    for (i = 0; i < width; ++i)
-    {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, distx, idistx, disty, idisty;
-                uint32_t tl, tr, bl, br, r;
-                uint32_t ft, fb;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t div;
-                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
-                    x1 = div >> 16;
-                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
-                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
-                    y1 = div >> 16;
-                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
-                } else {
-                    x1 = v.vector[0] >> 16;
-                    distx = (v.vector[0] >> 8) & 0xff;
-                    y1 = v.vector[1] >> 16;
-                    disty = (v.vector[1] >> 8) & 0xff;
-                }
-                x2 = x1 + 1;
-                y2 = y1 + 1;
-
-                idistx = 256 - distx;
-                idisty = 256 - disty;
-
-                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
-                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
-                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
-                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
-
-                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
-                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
-                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
-                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
-                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
-                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
-                r |= (((ft * idisty + fb * disty)) & 0xff0000);
-                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
-                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
-                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-                *(buffer + i) = r;
-            }
-        }
-
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
-{
-    pixman_box16_t dummy;
-    fetchPixelProc fetch;
-    int i;
-
-    pixman_fixed_t *params = pict->common.filter_params;
-    int32_t cwidth = pixman_fixed_to_int(params[0]);
-    int32_t cheight = pixman_fixed_to_int(params[1]);
-    int xoff = (params[0] - pixman_fixed_1) >> 1;
-    int yoff = (params[1] - pixman_fixed_1) >> 1;
-    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
-
-    params += 2;
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-        {
-            if (!v.vector[2]) {
-                *(buffer + i) = 0;
-            } else {
-                int x1, x2, y1, y2, x, y;
-                int32_t srtot, sgtot, sbtot, satot;
-                pixman_fixed_t *p = params;
-
-                if (!affine) {
-                    pixman_fixed_48_16_t tmp;
-                    tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
-                    x1 = pixman_fixed_to_int(tmp);
-                    tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
-                    y1 = pixman_fixed_to_int(tmp);
-                } else {
-                    x1 = pixman_fixed_to_int(v.vector[0] - xoff);
-                    y1 = pixman_fixed_to_int(v.vector[1] - yoff);
-                }
-                x2 = x1 + cwidth;
-                y2 = y1 + cheight;
-
-                srtot = sgtot = sbtot = satot = 0;
-
-                for (y = y1; y < y2; y++) {
-                    int ty;
-                    switch (pict->common.repeat) {
-                        case PIXMAN_REPEAT_NORMAL:
-                            ty = MOD (y, pict->height);
-                            break;
-                        case PIXMAN_REPEAT_PAD:
-                            ty = CLIP (y, 0, pict->height-1);
-                            break;
-                        default:
-                            ty = y;
-                    }
-                    for (x = x1; x < x2; x++) {
-                        if (*p) {
-                            int tx;
-                            switch (pict->common.repeat) {
-                                case PIXMAN_REPEAT_NORMAL:
-                                    tx = MOD (x, pict->width);
-                                    break;
-                                case PIXMAN_REPEAT_PAD:
-                                    tx = CLIP (x, 0, pict->width-1);
-                                    break;
-                                default:
-                                    tx = x;
-                            }
-                            if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &dummy)) {
-                                uint32_t c = fetch(pict, tx, ty);
-
-                                srtot += Red(c) * *p;
-                                sgtot += Green(c) * *p;
-                                sbtot += Blue(c) * *p;
-                                satot += Alpha(c) * *p;
-                            }
-                        }
-                        p++;
-                    }
-                }
-
-                satot >>= 16;
-                srtot >>= 16;
-                sgtot >>= 16;
-                sbtot >>= 16;
-
-                if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
-                if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
-                if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
-                if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
-
-                *(buffer + i) = ((satot << 24) |
-                                 (srtot << 16) |
-                                 (sgtot <<  8) |
-                                 (sbtot       ));
-            }
-        }
-        v.vector[0] += unit.vector[0];
-        v.vector[1] += unit.vector[1];
-        v.vector[2] += unit.vector[2];
-    }
-}
-
-static void
-adjust (pixman_vector_t *v, pixman_vector_t *u, pixman_fixed_t adjustment)
-{
-    int delta_v = (adjustment * v->vector[2]) >> 16;
-    int delta_u = (adjustment * u->vector[2]) >> 16;
-    
-    v->vector[0] += delta_v;
-    v->vector[1] += delta_v;
-    
-    u->vector[0] += delta_u;
-    u->vector[1] += delta_u;
-}
-
-static void
-fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-    uint32_t     *bits;
-    int32_t    stride;
-    pixman_vector_t v;
-    pixman_vector_t unit;
-    pixman_bool_t affine = TRUE;
-
-    bits = pict->bits;
-    stride = pict->rowstride;
-
-    /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
-    v.vector[2] = pixman_fixed_1;
-
-    /* when using convolution filters or PIXMAN_REPEAT_PAD one might get here without a transform */
-    if (pict->common.transform)
-    {
-        if (!pixman_transform_point_3d (pict->common.transform, &v))
-            return;
-        unit.vector[0] = pict->common.transform->matrix[0][0];
-        unit.vector[1] = pict->common.transform->matrix[1][0];
-        unit.vector[2] = pict->common.transform->matrix[2][0];
-        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
-    }
-    else
-    {
-        unit.vector[0] = pixman_fixed_1;
-        unit.vector[1] = 0;
-        unit.vector[2] = 0;
-    }
-
-    /* This allows filtering code to pretend that pixels are located at integer coordinates */
-    adjust (&v, &unit, -(pixman_fixed_1 / 2));
-    
-    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
-    {
-	/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
-	
-        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
-        {
-            fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
-
-        }
-        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
-        {
-            fbFetchTransformed_Nearest_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-        else
-        {
-            fbFetchTransformed_Nearest_General(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
-	       pict->common.filter == PIXMAN_FILTER_GOOD	||
-	       pict->common.filter == PIXMAN_FILTER_BEST)
-    {
-        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
-        {
-            fbFetchTransformed_Bilinear_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
-        {
-            fbFetchTransformed_Bilinear_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-        else
-        {
-            fbFetchTransformed_Bilinear_General(pict, width, buffer, mask, maskBits, affine, v, unit);
-        }
-    }
-    else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION)
-    {
-	/* Round to closest integer, ensuring that 0.5 rounds to 0, not 1 */
-	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
-	
-        fbFetchTransformed_Convolution(pict, width, buffer, mask, maskBits, affine, v, unit);
-    }
-}
-
-
-static void
-fbFetchExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-    int i;
-    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
-    uint32_t *alpha_buffer = _alpha_buffer;
-
-    if (!pict->common.alpha_map) {
-        fbFetchTransformed (pict, x, y, width, buffer, mask, maskBits);
-	return;
-    }
-    if (width > SCANLINE_BUFFER_LENGTH)
-        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
-
-    fbFetchTransformed(pict, x, y, width, buffer, mask, maskBits);
-    fbFetchTransformed((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
-		       y - pict->common.alpha_origin.y, width, alpha_buffer,
-		       mask, maskBits);
-    for (i = 0; i < width; ++i) {
-        if (!mask || mask[i] & maskBits)
-	{
-	    int a = alpha_buffer[i]>>24;
-	    *(buffer + i) = (a << 24)
-		| (div_255(Red(*(buffer + i)) * a) << 16)
-		| (div_255(Green(*(buffer + i)) * a) << 8)
-		| (div_255(Blue(*(buffer + i)) * a));
-	}
-    }
-
-    if (alpha_buffer != _alpha_buffer)
-        free(alpha_buffer);
-}
-
 static void
 fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
@@ -783,44 +165,6 @@ fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
     store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
 }
 
-static void
-fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t *bits, *alpha_bits;
-    int32_t stride, astride;
-    int ax, ay;
-    storeProc store;
-    storeProc astore;
-    const pixman_indexed_t * indexed = pict->indexed;
-    const pixman_indexed_t * aindexed;
-
-    if (!pict->common.alpha_map) {
-        fbStore(pict, x, y, width, buffer);
-	return;
-    }
-
-    store = STORE_PROC_FOR_PICTURE(pict);
-    astore = STORE_PROC_FOR_PICTURE(pict->common.alpha_map);
-    aindexed = pict->common.alpha_map->indexed;
-
-    ax = x;
-    ay = y;
-
-    bits = pict->bits;
-    stride = pict->rowstride;
-
-    alpha_bits = pict->common.alpha_map->bits;
-    astride = pict->common.alpha_map->rowstride;
-
-    bits       += y*stride;
-    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
-
-
-    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
-    astore((pixman_image_t *)pict->common.alpha_map,
-	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
-}
-
 typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
 typedef void (*scanFetchProc)(pixman_image_t *, int, int, int, uint32_t *,
 			      uint32_t *, uint32_t);
@@ -858,7 +202,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 	if (bits->common.alpha_map)
 	{
-	    fetchSrc = (scanFetchProc)fbFetchExternalAlpha;
+	    fetchSrc = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
 	}
 	else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		 bits->width == 1 &&
@@ -874,7 +218,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	}
 	else
 	{
-	    fetchSrc = (scanFetchProc)fbFetchTransformed;
+	    fetchSrc = (scanFetchProc)FB_FETCH_TRANSFORMED;
 	}
     }
 
@@ -897,7 +241,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 	    if (bits->common.alpha_map)
 	    {
-		fetchMask = (scanFetchProc)fbFetchExternalAlpha;
+		fetchMask = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
 	    }
 	    else if ((bits->common.repeat == PIXMAN_REPEAT_NORMAL || bits->common.repeat == PIXMAN_REPEAT_PAD) &&
 		     bits->width == 1 && bits->height == 1)
@@ -909,14 +253,14 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
                     && bits->common.repeat != PIXMAN_REPEAT_PAD)
 		fetchMask = (scanFetchProc)fbFetch;
 	    else
-		fetchMask = (scanFetchProc)fbFetchTransformed;
+		fetchMask = (scanFetchProc)FB_FETCH_TRANSFORMED;
 	}
     }
 
     if (data->dest->common.alpha_map)
     {
-	fetchDest = (scanFetchProc)fbFetchExternalAlpha;
-	store = (scanStoreProc)fbStoreExternalAlpha;
+	fetchDest = (scanFetchProc)FB_FETCH_EXTERNAL_ALPHA;
+	store = (scanStoreProc)FB_STORE_EXTERNAL_ALPHA;
 
 	if (data->op == PIXMAN_OP_CLEAR || data->op == PIXMAN_OP_SRC)
 	    fetchDest = NULL;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 3d67572..5767c10 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -191,6 +191,22 @@ storeProc pixman_storeProcForPicture_accessors (bits_image_t *);
 void pixmanFetchSourcePict(source_image_t *, int x, int y, int width,
                            uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
 
+void fbFetchTransformed(bits_image_t *, int x, int y, int width,
+                        uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+void fbStoreExternalAlpha(bits_image_t *, int x, int y, int width,
+                          uint32_t *buffer);
+void fbFetchExternalAlpha(bits_image_t *, int x, int y, int width,
+                          uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+
+void fbFetchTransformed_accessors(bits_image_t *, int x, int y, int width,
+                                  uint32_t *buffer, uint32_t *mask,
+                                  uint32_t maskBits);
+void fbStoreExternalAlpha_accessors(bits_image_t *, int x, int y, int width,
+                                    uint32_t *buffer);
+void fbFetchExternalAlpha_accessors(bits_image_t *, int x, int y, int width,
+                                    uint32_t *buffer, uint32_t *mask,
+                                    uint32_t maskBits);
+
 /* end */
 
 typedef enum
diff --git a/pixman/pixman-transformed-accessors.c b/pixman/pixman-transformed-accessors.c
new file mode 100644
index 0000000..442ca24
--- /dev/null
+++ b/pixman/pixman-transformed-accessors.c
@@ -0,0 +1,3 @@
+#define PIXMAN_FB_ACCESSORS
+
+#include "pixman-transformed.c"
diff --git a/pixman/pixman-transformed.c b/pixman/pixman-transformed.c
new file mode 100644
index 0000000..19085ca
--- /dev/null
+++ b/pixman/pixman-transformed.c
@@ -0,0 +1,726 @@
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+
+#include "pixman-private.h"
+
+#ifdef PIXMAN_FB_ACCESSORS
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed_accessors
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha_accessors
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha_accessors
+
+#else
+
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+
+#define FB_FETCH_TRANSFORMED fbFetchTransformed
+#define FB_FETCH_EXTERNAL_ALPHA fbFetchExternalAlpha
+#define FB_STORE_EXTERNAL_ALPHA fbStoreExternalAlpha
+
+#endif
+
+/*
+ * Fetch from region strategies
+ */
+typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box);
+
+static inline uint32_t
+fbFetchFromNoRegion(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
+{
+    return fetch (pict, x, y);
+}
+
+static uint32_t
+fbFetchFromNRectangles(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
+{
+    pixman_box16_t box2;
+    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box2))
+        return fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
+    else
+        return 0;
+}
+
+static uint32_t
+fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc fetch, pixman_box16_t *box)
+{
+    pixman_box16_t box2 = *box;
+    return ((x < box2.x1) | (x >= box2.x2) | (y < box2.y1) | (y >= box2.y2)) ?
+        0 : fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
+}
+
+/*
+ * Fetching Algorithms
+ */
+static void
+fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t* box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int x, y, i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for ( i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2])
+            {
+                *(buffer + i) = 0;
+            }
+            else
+            {
+                if (!affine)
+                {
+                    y = MOD(DIV(v.vector[1],v.vector[2]), pict->height);
+                    x = MOD(DIV(v.vector[0],v.vector[2]), pict->width);
+                }
+                else
+                {
+                    y = MOD(v.vector[1]>>16, pict->height);
+                    x = MOD(v.vector[0]>>16, pict->width);
+                }
+                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
+            }
+        }
+
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int x, y, i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for (i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2])
+            {
+                *(buffer + i) = 0;
+            }
+            else
+            {
+                if (!affine)
+                {
+                    y = CLIP(DIV(v.vector[1], v.vector[2]), 0, pict->height-1);
+                    x = CLIP(DIV(v.vector[0], v.vector[2]), 0, pict->width-1);
+                }
+                else
+                {
+                    y = CLIP(v.vector[1]>>16, 0, pict->height-1);
+                    x = CLIP(v.vector[0]>>16, 0, pict->width-1);
+                }
+
+                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
+            }
+        }
+
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int x, y, i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    {
+        box = &(pict->common.src_clip->extents);
+        fetchFromRegion = fbFetchFromOneRectangle;
+    }
+    else
+    {
+        fetchFromRegion = fbFetchFromNRectangles;
+    }
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                if (!affine) {
+                    y = DIV(v.vector[1],v.vector[2]);
+                    x = DIV(v.vector[0],v.vector[2]);
+                } else {
+                    y = v.vector[1]>>16;
+                    x = v.vector[0]>>16;
+                }
+                *(buffer + i) = fetchFromRegion(pict, x, y, buffer, fetch, box);
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, distx, idistx, disty, idisty;
+                uint32_t tl, tr, bl, br, r;
+                uint32_t ft, fb;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t div;
+                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
+                    x1 = div >> 16;
+                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
+                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
+                    y1 = div >> 16;
+                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
+                } else {
+                    x1 = v.vector[0] >> 16;
+                    distx = (v.vector[0] >> 8) & 0xff;
+                    y1 = v.vector[1] >> 16;
+                    disty = (v.vector[1] >> 8) & 0xff;
+                }
+                x2 = x1 + 1;
+                y2 = y1 + 1;
+
+                idistx = 256 - distx;
+                idisty = 256 - disty;
+
+                x1 = MOD (x1, pict->width);
+                x2 = MOD (x2, pict->width);
+                y1 = MOD (y1, pict->height);
+                y2 = MOD (y2, pict->height);
+
+                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
+                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
+                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
+                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
+
+                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+                *(buffer + i) = r;
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+        fetchFromRegion = fbFetchFromNoRegion;
+    else
+        fetchFromRegion = fbFetchFromNRectangles;
+
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, distx, idistx, disty, idisty;
+                uint32_t tl, tr, bl, br, r;
+                uint32_t ft, fb;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t div;
+                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
+                    x1 = div >> 16;
+                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
+                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
+                    y1 = div >> 16;
+                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
+                } else {
+                    x1 = v.vector[0] >> 16;
+                    distx = (v.vector[0] >> 8) & 0xff;
+                    y1 = v.vector[1] >> 16;
+                    disty = (v.vector[1] >> 8) & 0xff;
+                }
+                x2 = x1 + 1;
+                y2 = y1 + 1;
+
+                idistx = 256 - distx;
+                idisty = 256 - disty;
+
+                x1 = CLIP (x1, 0, pict->width-1);
+                x2 = CLIP (x2, 0, pict->width-1);
+                y1 = CLIP (y1, 0, pict->height-1);
+                y2 = CLIP (y2, 0, pict->height-1);
+
+                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
+                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
+                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
+                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
+
+                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+                *(buffer + i) = r;
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t *box = NULL;
+    fetchPixelProc   fetch;
+    fetchFromRegionProc fetchFromRegion;
+    int i;
+
+    /* initialize the two function pointers */
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    {
+        box = &(pict->common.src_clip->extents);
+        fetchFromRegion = fbFetchFromOneRectangle;
+    }
+    else
+    {
+        fetchFromRegion = fbFetchFromNRectangles;
+    }
+
+    for (i = 0; i < width; ++i)
+    {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, distx, idistx, disty, idisty;
+                uint32_t tl, tr, bl, br, r;
+                uint32_t ft, fb;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t div;
+                    div = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2];
+                    x1 = div >> 16;
+                    distx = ((pixman_fixed_t)div >> 8) & 0xff;
+                    div = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2];
+                    y1 = div >> 16;
+                    disty = ((pixman_fixed_t)div >> 8) & 0xff;
+                } else {
+                    x1 = v.vector[0] >> 16;
+                    distx = (v.vector[0] >> 8) & 0xff;
+                    y1 = v.vector[1] >> 16;
+                    disty = (v.vector[1] >> 8) & 0xff;
+                }
+                x2 = x1 + 1;
+                y2 = y1 + 1;
+
+                idistx = 256 - distx;
+                idisty = 256 - disty;
+
+                tl = fetchFromRegion(pict, x1, y1, buffer, fetch, box);
+                tr = fetchFromRegion(pict, x2, y1, buffer, fetch, box);
+                bl = fetchFromRegion(pict, x1, y2, buffer, fetch, box);
+                br = fetchFromRegion(pict, x2, y2, buffer, fetch, box);
+
+                ft = FbGet8(tl,0) * idistx + FbGet8(tr,0) * distx;
+                fb = FbGet8(bl,0) * idistx + FbGet8(br,0) * distx;
+                r = (((ft * idisty + fb * disty) >> 16) & 0xff);
+                ft = FbGet8(tl,8) * idistx + FbGet8(tr,8) * distx;
+                fb = FbGet8(bl,8) * idistx + FbGet8(br,8) * distx;
+                r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
+                ft = FbGet8(tl,16) * idistx + FbGet8(tr,16) * distx;
+                fb = FbGet8(bl,16) * idistx + FbGet8(br,16) * distx;
+                r |= (((ft * idisty + fb * disty)) & 0xff0000);
+                ft = FbGet8(tl,24) * idistx + FbGet8(tr,24) * distx;
+                fb = FbGet8(bl,24) * idistx + FbGet8(br,24) * distx;
+                r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
+                *(buffer + i) = r;
+            }
+        }
+
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
+{
+    pixman_box16_t dummy;
+    fetchPixelProc fetch;
+    int i;
+
+    pixman_fixed_t *params = pict->common.filter_params;
+    int32_t cwidth = pixman_fixed_to_int(params[0]);
+    int32_t cheight = pixman_fixed_to_int(params[1]);
+    int xoff = (params[0] - pixman_fixed_1) >> 1;
+    int yoff = (params[1] - pixman_fixed_1) >> 1;
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
+
+    params += 2;
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+        {
+            if (!v.vector[2]) {
+                *(buffer + i) = 0;
+            } else {
+                int x1, x2, y1, y2, x, y;
+                int32_t srtot, sgtot, sbtot, satot;
+                pixman_fixed_t *p = params;
+
+                if (!affine) {
+                    pixman_fixed_48_16_t tmp;
+                    tmp = ((pixman_fixed_48_16_t)v.vector[0] << 16)/v.vector[2] - xoff;
+                    x1 = pixman_fixed_to_int(tmp);
+                    tmp = ((pixman_fixed_48_16_t)v.vector[1] << 16)/v.vector[2] - yoff;
+                    y1 = pixman_fixed_to_int(tmp);
+                } else {
+                    x1 = pixman_fixed_to_int(v.vector[0] - xoff);
+                    y1 = pixman_fixed_to_int(v.vector[1] - yoff);
+                }
+                x2 = x1 + cwidth;
+                y2 = y1 + cheight;
+
+                srtot = sgtot = sbtot = satot = 0;
+
+                for (y = y1; y < y2; y++) {
+                    int ty;
+                    switch (pict->common.repeat) {
+                        case PIXMAN_REPEAT_NORMAL:
+                            ty = MOD (y, pict->height);
+                            break;
+                        case PIXMAN_REPEAT_PAD:
+                            ty = CLIP (y, 0, pict->height-1);
+                            break;
+                        default:
+                            ty = y;
+                    }
+                    for (x = x1; x < x2; x++) {
+                        if (*p) {
+                            int tx;
+                            switch (pict->common.repeat) {
+                                case PIXMAN_REPEAT_NORMAL:
+                                    tx = MOD (x, pict->width);
+                                    break;
+                                case PIXMAN_REPEAT_PAD:
+                                    tx = CLIP (x, 0, pict->width-1);
+                                    break;
+                                default:
+                                    tx = x;
+                            }
+                            if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &dummy)) {
+                                uint32_t c = fetch(pict, tx, ty);
+
+                                srtot += Red(c) * *p;
+                                sgtot += Green(c) * *p;
+                                sbtot += Blue(c) * *p;
+                                satot += Alpha(c) * *p;
+                            }
+                        }
+                        p++;
+                    }
+                }
+
+                satot >>= 16;
+                srtot >>= 16;
+                sgtot >>= 16;
+                sbtot >>= 16;
+
+                if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
+                if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
+                if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
+                if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
+
+                *(buffer + i) = ((satot << 24) |
+                                 (srtot << 16) |
+                                 (sgtot <<  8) |
+                                 (sbtot       ));
+            }
+        }
+        v.vector[0] += unit.vector[0];
+        v.vector[1] += unit.vector[1];
+        v.vector[2] += unit.vector[2];
+    }
+}
+
+static void
+adjust (pixman_vector_t *v, pixman_vector_t *u, pixman_fixed_t adjustment)
+{
+    int delta_v = (adjustment * v->vector[2]) >> 16;
+    int delta_u = (adjustment * u->vector[2]) >> 16;
+    
+    v->vector[0] += delta_v;
+    v->vector[1] += delta_v;
+    
+    u->vector[0] += delta_u;
+    u->vector[1] += delta_u;
+}
+
+void
+FB_FETCH_TRANSFORMED(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+    uint32_t     *bits;
+    int32_t    stride;
+    pixman_vector_t v;
+    pixman_vector_t unit;
+    pixman_bool_t affine = TRUE;
+
+    bits = pict->bits;
+    stride = pict->rowstride;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    /* when using convolution filters or PIXMAN_REPEAT_PAD one might get here without a transform */
+    if (pict->common.transform)
+    {
+        if (!pixman_transform_point_3d (pict->common.transform, &v))
+            return;
+        unit.vector[0] = pict->common.transform->matrix[0][0];
+        unit.vector[1] = pict->common.transform->matrix[1][0];
+        unit.vector[2] = pict->common.transform->matrix[2][0];
+        affine = v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0;
+    }
+    else
+    {
+        unit.vector[0] = pixman_fixed_1;
+        unit.vector[1] = 0;
+        unit.vector[2] = 0;
+    }
+
+    /* This allows filtering code to pretend that pixels are located at integer coordinates */
+    adjust (&v, &unit, -(pixman_fixed_1 / 2));
+    
+    if (pict->common.filter == PIXMAN_FILTER_NEAREST || pict->common.filter == PIXMAN_FILTER_FAST)
+    {
+	/* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
+	
+        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
+        {
+            fbFetchTransformed_Nearest_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
+
+        }
+        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
+        {
+            fbFetchTransformed_Nearest_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+        else
+        {
+            fbFetchTransformed_Nearest_General(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+    } else if (pict->common.filter == PIXMAN_FILTER_BILINEAR	||
+	       pict->common.filter == PIXMAN_FILTER_GOOD	||
+	       pict->common.filter == PIXMAN_FILTER_BEST)
+    {
+        if (pict->common.repeat == PIXMAN_REPEAT_NORMAL)
+        {
+            fbFetchTransformed_Bilinear_Normal(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+        else if (pict->common.repeat == PIXMAN_REPEAT_PAD)
+        {
+            fbFetchTransformed_Bilinear_Pad(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+        else
+        {
+            fbFetchTransformed_Bilinear_General(pict, width, buffer, mask, maskBits, affine, v, unit);
+        }
+    }
+    else if (pict->common.filter == PIXMAN_FILTER_CONVOLUTION)
+    {
+	/* Round to closest integer, ensuring that 0.5 rounds to 0, not 1 */
+	adjust (&v, &unit, pixman_fixed_1 / 2 - pixman_fixed_e);
+	
+        fbFetchTransformed_Convolution(pict, width, buffer, mask, maskBits, affine, v, unit);
+    }
+}
+
+#define SCANLINE_BUFFER_LENGTH 2048
+
+void
+FB_FETCH_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
+                        uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+    int i;
+    uint32_t _alpha_buffer[SCANLINE_BUFFER_LENGTH];
+    uint32_t *alpha_buffer = _alpha_buffer;
+
+    if (!pict->common.alpha_map) {
+        FB_FETCH_TRANSFORMED (pict, x, y, width, buffer, mask, maskBits);
+	return;
+    }
+    if (width > SCANLINE_BUFFER_LENGTH)
+        alpha_buffer = (uint32_t *) pixman_malloc_ab (width, sizeof(uint32_t));
+
+    FB_FETCH_TRANSFORMED(pict, x, y, width, buffer, mask, maskBits);
+    FB_FETCH_TRANSFORMED((bits_image_t *)pict->common.alpha_map, x - pict->common.alpha_origin.x,
+			 y - pict->common.alpha_origin.y, width, alpha_buffer,
+			 mask, maskBits);
+    for (i = 0; i < width; ++i) {
+        if (!mask || mask[i] & maskBits)
+	{
+	    int a = alpha_buffer[i]>>24;
+	    *(buffer + i) = (a << 24)
+		| (div_255(Red(*(buffer + i)) * a) << 16)
+		| (div_255(Green(*(buffer + i)) * a) << 8)
+		| (div_255(Blue(*(buffer + i)) * a));
+	}
+    }
+
+    if (alpha_buffer != _alpha_buffer)
+        free(alpha_buffer);
+}
+
+void
+FB_STORE_EXTERNAL_ALPHA(bits_image_t * pict, int x, int y, int width,
+                        uint32_t *buffer)
+{
+    uint32_t *bits, *alpha_bits;
+    int32_t stride, astride;
+    int ax, ay;
+    storeProc store;
+    storeProc astore;
+    const pixman_indexed_t * indexed = pict->indexed;
+    const pixman_indexed_t * aindexed;
+
+    if (!pict->common.alpha_map) {
+        // XXX[AGP]: This should never happen!
+        // fbStore(pict, x, y, width, buffer);
+        abort();
+	return;
+    }
+
+    store = STORE_PROC_FOR_PICTURE(pict);
+    astore = STORE_PROC_FOR_PICTURE(pict->common.alpha_map);
+    aindexed = pict->common.alpha_map->indexed;
+
+    ax = x;
+    ay = y;
+
+    bits = pict->bits;
+    stride = pict->rowstride;
+
+    alpha_bits = pict->common.alpha_map->bits;
+    astride = pict->common.alpha_map->rowstride;
+
+    bits       += y*stride;
+    alpha_bits += (ay - pict->common.alpha_origin.y)*astride;
+
+
+    store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
+    astore((pixman_image_t *)pict->common.alpha_map,
+	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
+}
+
commit 90ff2bfbe5fd7f1e3aa5273e0fb72ae05e1e88c1
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Mar 21 18:18:50 2008 -0700

    Move pixmanFetchSourcePict and friends into pixman-source.c.

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 259c2a5..79c6788 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -12,6 +12,7 @@ libpixman_1_la_SOURCES =		\
 	pixman-compose.c	\
 	pixman-compose-accessors.c	\
 	pixman-pict.c		\
+	pixman-source.c		\
 	pixman-utils.c		\
 	pixman-edge.c		\
 	pixman-edge-accessors.c		\
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index 4a772ee..b4096f2 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -30,6 +30,7 @@ SOURCES = \
 	pixman-compose.c				\
 	pixman-compose-accessors.c	\
 	pixman-pict.c					\
+	pixman-source.c					\
 	pixman-utils.c					\
 	pixman-edge.c					\
 	pixman-edge-accessors.c		\
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index f713c43..00d0171 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -140,654 +140,6 @@ FbComposeFunctions PIXMAN_COMPOSE_FUNCTIONS = {
     pixman_fbCombineMaskU
 };
 
-typedef struct
-{
-    uint32_t        left_ag;
-    uint32_t        left_rb;
-    uint32_t        right_ag;
-    uint32_t        right_rb;
-    int32_t       left_x;
-    int32_t       right_x;
-    int32_t       stepper;
-
-    pixman_gradient_stop_t	*stops;
-    int                      num_stops;
-    unsigned int             spread;
-
-    int		  need_reset;
-} GradientWalker;
-
-static void
-_gradient_walker_init (GradientWalker  *walker,
-		       gradient_t      *gradient,
-		       unsigned int     spread)
-{
-    walker->num_stops = gradient->n_stops;
-    walker->stops     = gradient->stops;
-    walker->left_x    = 0;
-    walker->right_x   = 0x10000;
-    walker->stepper   = 0;
-    walker->left_ag   = 0;
-    walker->left_rb   = 0;
-    walker->right_ag  = 0;
-    walker->right_rb  = 0;
-    walker->spread    = spread;
-
-    walker->need_reset = TRUE;
-}
-
-static void
-_gradient_walker_reset (GradientWalker  *walker,
-                        pixman_fixed_32_32_t     pos)
-{
-    int32_t                  x, left_x, right_x;
-    pixman_color_t          *left_c, *right_c;
-    int                      n, count = walker->num_stops;
-    pixman_gradient_stop_t *      stops = walker->stops;
-
-    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
-
-    switch (walker->spread)
-    {
-    case PIXMAN_REPEAT_NORMAL:
-	x = (int32_t)pos & 0xFFFF;
-	for (n = 0; n < count; n++)
-	    if (x < stops[n].x)
-		break;
-	if (n == 0) {
-	    left_x =  stops[count-1].x - 0x10000;
-	    left_c = &stops[count-1].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-
-	if (n == count) {
-	    right_x =  stops[0].x + 0x10000;
-	    right_c = &stops[0].color;
-	} else {
-	    right_x =  stops[n].x;
-	    right_c = &stops[n].color;
-	}
-	left_x  += (pos - x);
-	right_x += (pos - x);
-	break;
-
-    case PIXMAN_REPEAT_PAD:
-	for (n = 0; n < count; n++)
-	    if (pos < stops[n].x)
-		break;
-
-	if (n == 0) {
-	    left_x =  INT32_MIN;
-	    left_c = &stops[0].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-
-	if (n == count) {
-	    right_x =  INT32_MAX;
-	    right_c = &stops[n-1].color;
-	} else {
-	    right_x =  stops[n].x;
-	    right_c = &stops[n].color;
-	}
-	break;
-
-    case PIXMAN_REPEAT_REFLECT:
-	x = (int32_t)pos & 0xFFFF;
-	if ((int32_t)pos & 0x10000)
-	    x = 0x10000 - x;
-	for (n = 0; n < count; n++)
-	    if (x < stops[n].x)
-		break;
-
-	if (n == 0) {
-	    left_x =  -stops[0].x;
-	    left_c = &stops[0].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-
-	if (n == count) {
-	    right_x = 0x20000 - stops[n-1].x;
-	    right_c = &stops[n-1].color;
-	} else {
-	    right_x =  stops[n].x;
-	    right_c = &stops[n].color;
-	}
-
-	if ((int32_t)pos & 0x10000) {
-	    pixman_color_t  *tmp_c;
-	    int32_t          tmp_x;
-
-	    tmp_x   = 0x10000 - right_x;
-	    right_x = 0x10000 - left_x;
-	    left_x  = tmp_x;
-
-	    tmp_c   = right_c;
-	    right_c = left_c;
-	    left_c  = tmp_c;
-
-	    x = 0x10000 - x;
-	}
-	left_x  += (pos - x);
-	right_x += (pos - x);
-	break;
-
-    default:  /* RepeatNone */
-	for (n = 0; n < count; n++)
-	    if (pos < stops[n].x)
-		break;
-
-	if (n == 0)
-	{
-	    left_x  =  INT32_MIN;
-	    right_x =  stops[0].x;
-	    left_c  = right_c = (pixman_color_t*) &transparent_black;
-	}
-	else if (n == count)
-	{
-	    left_x  = stops[n-1].x;
-	    right_x = INT32_MAX;
-	    left_c  = right_c = (pixman_color_t*) &transparent_black;
-	}
-	else
-	{
-	    left_x  =  stops[n-1].x;
-	    right_x =  stops[n].x;
-	    left_c  = &stops[n-1].color;
-	    right_c = &stops[n].color;
-	}
-    }
-
-    walker->left_x   = left_x;
-    walker->right_x  = right_x;
-    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
-    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
-    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
-    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-
-    if ( walker->left_x == walker->right_x                ||
-	 ( walker->left_ag == walker->right_ag &&
-	   walker->left_rb == walker->right_rb )   )
-    {
-	walker->stepper = 0;
-    }
-    else
-    {
-	int32_t width = right_x - left_x;
-	walker->stepper = ((1 << 24) + width/2)/width;
-    }
-
-    walker->need_reset = FALSE;
-}
-
-#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
-    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
-
-
-/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
-static uint32_t
-_gradient_walker_pixel (GradientWalker  *walker,
-                        pixman_fixed_32_32_t     x)
-{
-    int  dist, idist;
-    uint32_t  t1, t2, a, color;
-
-    if (GRADIENT_WALKER_NEED_RESET (walker, x))
-        _gradient_walker_reset (walker, x);
-
-    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
-    idist = 256 - dist;
-
-    /* combined INTERPOLATE and premultiply */
-    t1 = walker->left_rb*idist + walker->right_rb*dist;
-    t1 = (t1 >> 8) & 0xff00ff;
-
-    t2  = walker->left_ag*idist + walker->right_ag*dist;
-    t2 &= 0xff00ff00;
-
-    color = t2 & 0xff000000;
-    a     = t2 >> 24;
-
-    t1  = t1*a + 0x800080;
-    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
-
-    t2  = (t2 >> 8)*a + 0x800080;
-    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
-
-    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
-}
-
-static void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
-{
-#if 0
-    SourcePictPtr   pGradient = pict->pSourcePict;
-#endif
-    GradientWalker  walker;
-    uint32_t       *end = buffer + width;
-    gradient_t	    *gradient;
-
-    if (pict->common.type == SOLID)
-    {
-	register uint32_t color = ((solid_fill_t *)pict)->color;
-
-	while (buffer < end)
-	    *(buffer++) = color;
-
-	return;
-    }
-
-    gradient = (gradient_t *)pict;
-
-    _gradient_walker_init (&walker, gradient, pict->common.repeat);
-
-    if (pict->common.type == LINEAR) {
-	pixman_vector_t v, unit;
-	pixman_fixed_32_32_t l;
-	pixman_fixed_48_16_t dx, dy, a, b, off;
-	linear_gradient_t *linear = (linear_gradient_t *)pict;
-
-        /* reference point is the center of the pixel */
-        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
-        v.vector[2] = pixman_fixed_1;
-        if (pict->common.transform) {
-            if (!pixman_transform_point_3d (pict->common.transform, &v))
-                return;
-            unit.vector[0] = pict->common.transform->matrix[0][0];
-            unit.vector[1] = pict->common.transform->matrix[1][0];
-            unit.vector[2] = pict->common.transform->matrix[2][0];
-        } else {
-            unit.vector[0] = pixman_fixed_1;
-            unit.vector[1] = 0;
-            unit.vector[2] = 0;
-        }
-
-        dx = linear->p2.x - linear->p1.x;
-        dy = linear->p2.y - linear->p1.y;
-        l = dx*dx + dy*dy;
-        if (l != 0) {
-            a = (dx << 32) / l;
-            b = (dy << 32) / l;
-            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
-        }
-        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
-            pixman_fixed_48_16_t inc, t;
-            /* affine transformation only */
-            if (l == 0) {
-                t = 0;
-                inc = 0;
-            } else {
-                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
-                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
-            }
-
-	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
-	    {
-		register uint32_t color;
-
-		color = _gradient_walker_pixel( &walker, t );
-		while (buffer < end)
-		    *(buffer++) = color;
-	    }
-	    else
-	    {
-                if (!mask) {
-                    while (buffer < end)
-                    {
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-                        buffer += 1;
-                        t      += inc;
-                    }
-                } else {
-                    while (buffer < end) {
-                        if (*mask++ & maskBits)
-                        {
-			    *(buffer) = _gradient_walker_pixel (&walker, t);
-                        }
-                        buffer += 1;
-                        t      += inc;
-                    }
-                }
-	    }
-	}
-	else /* projective transformation */
-	{
-	    pixman_fixed_48_16_t t;
-
-	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
-	    {
-		register uint32_t color;
-
-		if (v.vector[2] == 0)
-		{
-		    t = 0;
-		}
-		else
-		{
-		    pixman_fixed_48_16_t x, y;
-
-		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
-		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
-		    t = ((a * x + b * y) >> 16) + off;
-		}
-
- 		color = _gradient_walker_pixel( &walker, t );
-		while (buffer < end)
-		    *(buffer++) = color;
-	    }
-	    else
-	    {
-		while (buffer < end)
-		{
-		    if (!mask || *mask++ & maskBits)
-		    {
-			if (v.vector[2] == 0) {
-			    t = 0;
-			} else {
-			    pixman_fixed_48_16_t x, y;
-			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
-			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
-			    t = ((a*x + b*y) >> 16) + off;
-			}
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-		    ++buffer;
-		    v.vector[0] += unit.vector[0];
-		    v.vector[1] += unit.vector[1];
-		    v.vector[2] += unit.vector[2];
-		}
-            }
-        }
-    } else {
-
-/*
- * In the radial gradient problem we are given two circles (c₁,r₁) and
- * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
- * must compute the value(s) of t within [0.0, 1.0] representing the
- * circle(s) that would color the point.
- *
- * There are potentially two values of t since the point p can be
- * colored by both sides of the circle, (which happens whenever one
- * circle is not entirely contained within the other).
- *
- * If we solve for a value of t that is outside of [0.0, 1.0] then we
- * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
- * value within [0.0, 1.0].
- *
- * Here is an illustration of the problem:
- *
- *              pâ‚‚
- *           p  •
- *           •   ╲
- *        ·       ╲r₂
- *  p₁ ·           ╲
- *  •              θ╲
- *   ╲             ╌╌•
- *    ╲r₁        ·   c₂
- *    θ╲    ·
- *    ╌╌•
- *      c₁
- *
- * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
- * points p₁ and p₂ on the two circles are collinear with p. Then, the
- * desired value of t is the ratio of the length of p₁p to the length
- * of p₁p₂.
- *
- * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
- * We can also write six equations that constrain the problem:
- *
- * Point p₁ is a distance r₁ from c₁ at an angle of θ:
- *
- *	1. p₁x = c₁x + r₁·cos θ
- *	2. p₁y = c₁y + r₁·sin θ
- *
- * Point p₂ is a distance r₂ from c₂ at an angle of θ:
- *
- *	3. p₂x = c₂x + r2·cos θ
- *	4. p₂y = c₂y + r2·sin θ
- *
- * Point p lies at a fraction t along the line segment p₁p₂:
- *
- *	5. px = t·p₂x + (1-t)·p₁x
- *	6. py = t·p₂y + (1-t)·p₁y
- *
- * To solve, first subtitute 1-4 into 5 and 6:
- *
- * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
- * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
- *
- * Then solve each for cos θ and sin θ expressed as a function of t:
- *
- * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
- * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
- *
- * To simplify this a bit, we define new variables for several of the
- * common terms as shown below:
- *
- *              pâ‚‚
- *           p  •
- *           •   ╲
- *        ·  ┆    ╲r₂
- *  p₁ ·     ┆     ╲
- *  •     pdy┆      ╲
- *   ╲       ┆       •c₂
- *    ╲r₁    ┆   ·   ┆
- *     ╲    ·┆       ┆cdy
- *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
- *    c₁  pdx   cdx
- *
- * cdx = (c₂x - c₁x)
- * cdy = (c₂y - c₁y)
- *  dr =  r₂-r₁
- * pdx =  px - c₁x
- * pdy =  py - c₁y
- *
- * Note that cdx, cdy, and dr do not depend on point p at all, so can
- * be pre-computed for the entire gradient. The simplifed equations
- * are now:
- *
- * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
- * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
- *
- * Finally, to get a single function of t and eliminate the last
- * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
- * each equation, (we knew a quadratic was coming since it must be
- * possible to obtain two solutions in some cases):
- *
- * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
- * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
- *
- * Then add both together, set the result equal to 1, and express as a
- * standard quadratic equation in t of the form At² + Bt + C = 0
- *
- * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
- *
- * In other words:
- *
- * A = cdx² + cdy² - dr²
- * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
- * C = pdx² + pdy² - r₁²
- *
- * And again, notice that A does not depend on p, so can be
- * precomputed. From here we just use the quadratic formula to solve
- * for t:
- *
- * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
- */
-        /* radial or conical */
-        pixman_bool_t affine = TRUE;
-        double cx = 1.;
-        double cy = 0.;
-        double cz = 0.;
-	double rx = x + 0.5;
-	double ry = y + 0.5;
-        double rz = 1.;
-
-        if (pict->common.transform) {
-            pixman_vector_t v;
-            /* reference point is the center of the pixel */
-            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
-            v.vector[2] = pixman_fixed_1;
-            if (!pixman_transform_point_3d (pict->common.transform, &v))
-                return;
-
-            cx = pict->common.transform->matrix[0][0]/65536.;
-            cy = pict->common.transform->matrix[1][0]/65536.;
-            cz = pict->common.transform->matrix[2][0]/65536.;
-            rx = v.vector[0]/65536.;
-            ry = v.vector[1]/65536.;
-            rz = v.vector[2]/65536.;
-            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
-        }
-
-        if (pict->common.type == RADIAL) {
-	    radial_gradient_t *radial = (radial_gradient_t *)pict;
-            if (affine) {
-                while (buffer < end) {
-		    if (!mask || *mask++ & maskBits)
-		    {
-			double pdx, pdy;
-			double B, C;
-			double det;
-			double c1x = radial->c1.x / 65536.0;
-			double c1y = radial->c1.y / 65536.0;
-			double r1  = radial->c1.radius / 65536.0;
-                        pixman_fixed_48_16_t t;
-
-			pdx = rx - c1x;
-			pdy = ry - c1y;
-
-			B = -2 * (  pdx * radial->cdx
-				    + pdy * radial->cdy
-				    + r1 * radial->dr);
-			C = (pdx * pdx + pdy * pdy - r1 * r1);
-
-                        det = (B * B) - (4 * radial->A * C);
-			if (det < 0.0)
-			    det = 0.0;
-
-			if (radial->A < 0)
-			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
-			else
-			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-		    ++buffer;
-
-                    rx += cx;
-                    ry += cy;
-                }
-            } else {
-		/* projective */
-                while (buffer < end) {
-		    if (!mask || *mask++ & maskBits)
-		    {
-			double pdx, pdy;
-			double B, C;
-			double det;
-			double c1x = radial->c1.x / 65536.0;
-			double c1y = radial->c1.y / 65536.0;
-			double r1  = radial->c1.radius / 65536.0;
-                        pixman_fixed_48_16_t t;
-			double x, y;
-
-			if (rz != 0) {
-			    x = rx/rz;
-			    y = ry/rz;
-			} else {
-			    x = y = 0.;
-			}
-
-			pdx = x - c1x;
-			pdy = y - c1y;
-
-			B = -2 * (  pdx * radial->cdx
-				    + pdy * radial->cdy
-				    + r1 * radial->dr);
-			C = (pdx * pdx + pdy * pdy - r1 * r1);
-
-                        det = (B * B) - (4 * radial->A * C);
-			if (det < 0.0)
-			    det = 0.0;
-
-			if (radial->A < 0)
-			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
-			else
-			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-		    ++buffer;
-
-                    rx += cx;
-                    ry += cy;
-		    rz += cz;
-                }
-            }
-        } else /* SourcePictTypeConical */ {
-	    conical_gradient_t *conical = (conical_gradient_t *)pict;
-            double a = conical->angle/(180.*65536);
-            if (affine) {
-                rx -= conical->center.x/65536.;
-                ry -= conical->center.y/65536.;
-
-                while (buffer < end) {
-		    double angle;
-
-                    if (!mask || *mask++ & maskBits)
-		    {
-                        pixman_fixed_48_16_t   t;
-
-                        angle = atan2(ry, rx) + a;
-			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-
-                    ++buffer;
-                    rx += cx;
-                    ry += cy;
-                }
-            } else {
-                while (buffer < end) {
-                    double x, y;
-                    double angle;
-
-                    if (!mask || *mask++ & maskBits)
-                    {
-			pixman_fixed_48_16_t  t;
-
-			if (rz != 0) {
-			    x = rx/rz;
-			    y = ry/rz;
-			} else {
-			    x = y = 0.;
-			}
-			x -= conical->center.x/65536.;
-			y -= conical->center.y/65536.;
-			angle = atan2(y, x) + a;
-			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-
-			*(buffer) = _gradient_walker_pixel (&walker, t);
-		    }
-
-                    ++buffer;
-                    rx += cx;
-                    ry += cy;
-                    rz += cz;
-                }
-            }
-        }
-    }
-}
-
 /*
  * Fetch from region strategies
  */
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index b0aa375..3d67572 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -188,6 +188,9 @@ fetchProc pixman_fetchProcForPicture_accessors (bits_image_t *);
 fetchPixelProc pixman_fetchPixelProcForPicture_accessors (bits_image_t *);
 storeProc pixman_storeProcForPicture_accessors (bits_image_t *);
 
+void pixmanFetchSourcePict(source_image_t *, int x, int y, int width,
+                           uint32_t *buffer, uint32_t *mask, uint32_t maskBits);
+
 /* end */
 
 typedef enum
diff --git a/pixman/pixman-source.c b/pixman/pixman-source.c
new file mode 100644
index 0000000..a5a4235
--- /dev/null
+++ b/pixman/pixman-source.c
@@ -0,0 +1,681 @@
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <math.h>
+
+#include "pixman-private.h"
+
+typedef struct
+{
+    uint32_t        left_ag;
+    uint32_t        left_rb;
+    uint32_t        right_ag;
+    uint32_t        right_rb;
+    int32_t       left_x;
+    int32_t       right_x;
+    int32_t       stepper;
+
+    pixman_gradient_stop_t	*stops;
+    int                      num_stops;
+    unsigned int             spread;
+
+    int		  need_reset;
+} GradientWalker;
+
+static void
+_gradient_walker_init (GradientWalker  *walker,
+		       gradient_t      *gradient,
+		       unsigned int     spread)
+{
+    walker->num_stops = gradient->n_stops;
+    walker->stops     = gradient->stops;
+    walker->left_x    = 0;
+    walker->right_x   = 0x10000;
+    walker->stepper   = 0;
+    walker->left_ag   = 0;
+    walker->left_rb   = 0;
+    walker->right_ag  = 0;
+    walker->right_rb  = 0;
+    walker->spread    = spread;
+
+    walker->need_reset = TRUE;
+}
+
+static void
+_gradient_walker_reset (GradientWalker  *walker,
+                        pixman_fixed_32_32_t     pos)
+{
+    int32_t                  x, left_x, right_x;
+    pixman_color_t          *left_c, *right_c;
+    int                      n, count = walker->num_stops;
+    pixman_gradient_stop_t *      stops = walker->stops;
+
+    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
+
+    switch (walker->spread)
+    {
+    case PIXMAN_REPEAT_NORMAL:
+	x = (int32_t)pos & 0xFFFF;
+	for (n = 0; n < count; n++)
+	    if (x < stops[n].x)
+		break;
+	if (n == 0) {
+	    left_x =  stops[count-1].x - 0x10000;
+	    left_c = &stops[count-1].color;
+	} else {
+	    left_x =  stops[n-1].x;
+	    left_c = &stops[n-1].color;
+	}
+
+	if (n == count) {
+	    right_x =  stops[0].x + 0x10000;
+	    right_c = &stops[0].color;
+	} else {
+	    right_x =  stops[n].x;
+	    right_c = &stops[n].color;
+	}
+	left_x  += (pos - x);
+	right_x += (pos - x);
+	break;
+
+    case PIXMAN_REPEAT_PAD:
+	for (n = 0; n < count; n++)
+	    if (pos < stops[n].x)
+		break;
+
+	if (n == 0) {
+	    left_x =  INT32_MIN;
+	    left_c = &stops[0].color;
+	} else {
+	    left_x =  stops[n-1].x;
+	    left_c = &stops[n-1].color;
+	}
+
+	if (n == count) {
+	    right_x =  INT32_MAX;
+	    right_c = &stops[n-1].color;
+	} else {
+	    right_x =  stops[n].x;
+	    right_c = &stops[n].color;
+	}
+	break;
+
+    case PIXMAN_REPEAT_REFLECT:
+	x = (int32_t)pos & 0xFFFF;
+	if ((int32_t)pos & 0x10000)
+	    x = 0x10000 - x;
+	for (n = 0; n < count; n++)
+	    if (x < stops[n].x)
+		break;
+
+	if (n == 0) {
+	    left_x =  -stops[0].x;
+	    left_c = &stops[0].color;
+	} else {
+	    left_x =  stops[n-1].x;
+	    left_c = &stops[n-1].color;
+	}
+
+	if (n == count) {
+	    right_x = 0x20000 - stops[n-1].x;
+	    right_c = &stops[n-1].color;
+	} else {
+	    right_x =  stops[n].x;
+	    right_c = &stops[n].color;
+	}
+
+	if ((int32_t)pos & 0x10000) {
+	    pixman_color_t  *tmp_c;
+	    int32_t          tmp_x;
+
+	    tmp_x   = 0x10000 - right_x;
+	    right_x = 0x10000 - left_x;
+	    left_x  = tmp_x;
+
+	    tmp_c   = right_c;
+	    right_c = left_c;
+	    left_c  = tmp_c;
+
+	    x = 0x10000 - x;
+	}
+	left_x  += (pos - x);
+	right_x += (pos - x);
+	break;
+
+    default:  /* RepeatNone */
+	for (n = 0; n < count; n++)
+	    if (pos < stops[n].x)
+		break;
+
+	if (n == 0)
+	{
+	    left_x  =  INT32_MIN;
+	    right_x =  stops[0].x;
+	    left_c  = right_c = (pixman_color_t*) &transparent_black;
+	}
+	else if (n == count)
+	{
+	    left_x  = stops[n-1].x;
+	    right_x = INT32_MAX;
+	    left_c  = right_c = (pixman_color_t*) &transparent_black;
+	}
+	else
+	{
+	    left_x  =  stops[n-1].x;
+	    right_x =  stops[n].x;
+	    left_c  = &stops[n-1].color;
+	    right_c = &stops[n].color;
+	}
+    }
+
+    walker->left_x   = left_x;
+    walker->right_x  = right_x;
+    walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
+    walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
+    walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
+    walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
+
+    if ( walker->left_x == walker->right_x                ||
+	 ( walker->left_ag == walker->right_ag &&
+	   walker->left_rb == walker->right_rb )   )
+    {
+	walker->stepper = 0;
+    }
+    else
+    {
+	int32_t width = right_x - left_x;
+	walker->stepper = ((1 << 24) + width/2)/width;
+    }
+
+    walker->need_reset = FALSE;
+}
+
+#define  GRADIENT_WALKER_NEED_RESET(w,x)				\
+    ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
+
+
+/* the following assumes that GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
+static uint32_t
+_gradient_walker_pixel (GradientWalker  *walker,
+                        pixman_fixed_32_32_t     x)
+{
+    int  dist, idist;
+    uint32_t  t1, t2, a, color;
+
+    if (GRADIENT_WALKER_NEED_RESET (walker, x))
+        _gradient_walker_reset (walker, x);
+
+    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
+    idist = 256 - dist;
+
+    /* combined INTERPOLATE and premultiply */
+    t1 = walker->left_rb*idist + walker->right_rb*dist;
+    t1 = (t1 >> 8) & 0xff00ff;
+
+    t2  = walker->left_ag*idist + walker->right_ag*dist;
+    t2 &= 0xff00ff00;
+
+    color = t2 & 0xff000000;
+    a     = t2 >> 24;
+
+    t1  = t1*a + 0x800080;
+    t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
+
+    t2  = (t2 >> 8)*a + 0x800080;
+    t2  = (t2 + ((t2 >> 8) & 0xff00ff));
+
+    return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
+}
+
+void pixmanFetchSourcePict(source_image_t * pict, int x, int y, int width,
+                           uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
+{
+#if 0
+    SourcePictPtr   pGradient = pict->pSourcePict;
+#endif
+    GradientWalker  walker;
+    uint32_t       *end = buffer + width;
+    gradient_t	    *gradient;
+
+    if (pict->common.type == SOLID)
+    {
+	register uint32_t color = ((solid_fill_t *)pict)->color;
+
+	while (buffer < end)
+	    *(buffer++) = color;
+
+	return;
+    }
+
+    gradient = (gradient_t *)pict;
+
+    _gradient_walker_init (&walker, gradient, pict->common.repeat);
+
+    if (pict->common.type == LINEAR) {
+	pixman_vector_t v, unit;
+	pixman_fixed_32_32_t l;
+	pixman_fixed_48_16_t dx, dy, a, b, off;
+	linear_gradient_t *linear = (linear_gradient_t *)pict;
+
+        /* reference point is the center of the pixel */
+        v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
+        v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+        v.vector[2] = pixman_fixed_1;
+        if (pict->common.transform) {
+            if (!pixman_transform_point_3d (pict->common.transform, &v))
+                return;
+            unit.vector[0] = pict->common.transform->matrix[0][0];
+            unit.vector[1] = pict->common.transform->matrix[1][0];
+            unit.vector[2] = pict->common.transform->matrix[2][0];
+        } else {
+            unit.vector[0] = pixman_fixed_1;
+            unit.vector[1] = 0;
+            unit.vector[2] = 0;
+        }
+
+        dx = linear->p2.x - linear->p1.x;
+        dy = linear->p2.y - linear->p1.y;
+        l = dx*dx + dy*dy;
+        if (l != 0) {
+            a = (dx << 32) / l;
+            b = (dy << 32) / l;
+            off = (-a*linear->p1.x - b*linear->p1.y)>>16;
+        }
+        if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
+            pixman_fixed_48_16_t inc, t;
+            /* affine transformation only */
+            if (l == 0) {
+                t = 0;
+                inc = 0;
+            } else {
+                t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
+                inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
+            }
+
+	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
+	    {
+		register uint32_t color;
+
+		color = _gradient_walker_pixel( &walker, t );
+		while (buffer < end)
+		    *(buffer++) = color;
+	    }
+	    else
+	    {
+                if (!mask) {
+                    while (buffer < end)
+                    {
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+                        buffer += 1;
+                        t      += inc;
+                    }
+                } else {
+                    while (buffer < end) {
+                        if (*mask++ & maskBits)
+                        {
+			    *(buffer) = _gradient_walker_pixel (&walker, t);
+                        }
+                        buffer += 1;
+                        t      += inc;
+                    }
+                }
+	    }
+	}
+	else /* projective transformation */
+	{
+	    pixman_fixed_48_16_t t;
+
+	    if (pict->class == SOURCE_IMAGE_CLASS_VERTICAL)
+	    {
+		register uint32_t color;
+
+		if (v.vector[2] == 0)
+		{
+		    t = 0;
+		}
+		else
+		{
+		    pixman_fixed_48_16_t x, y;
+
+		    x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
+		    y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
+		    t = ((a * x + b * y) >> 16) + off;
+		}
+
+ 		color = _gradient_walker_pixel( &walker, t );
+		while (buffer < end)
+		    *(buffer++) = color;
+	    }
+	    else
+	    {
+		while (buffer < end)
+		{
+		    if (!mask || *mask++ & maskBits)
+		    {
+			if (v.vector[2] == 0) {
+			    t = 0;
+			} else {
+			    pixman_fixed_48_16_t x, y;
+			    x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
+			    y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
+			    t = ((a*x + b*y) >> 16) + off;
+			}
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+		    ++buffer;
+		    v.vector[0] += unit.vector[0];
+		    v.vector[1] += unit.vector[1];
+		    v.vector[2] += unit.vector[2];
+		}
+            }
+        }
+    } else {
+
+/*
+ * In the radial gradient problem we are given two circles (c₁,r₁) and
+ * (câ‚‚,râ‚‚) that define the gradient itself. Then, for any point p, we
+ * must compute the value(s) of t within [0.0, 1.0] representing the
+ * circle(s) that would color the point.
+ *
+ * There are potentially two values of t since the point p can be
+ * colored by both sides of the circle, (which happens whenever one
+ * circle is not entirely contained within the other).
+ *
+ * If we solve for a value of t that is outside of [0.0, 1.0] then we
+ * use the extend mode (NONE, REPEAT, REFLECT, or PAD) to map to a
+ * value within [0.0, 1.0].
+ *
+ * Here is an illustration of the problem:
+ *
+ *              pâ‚‚
+ *           p  •
+ *           •   ╲
+ *        ·       ╲r₂
+ *  p₁ ·           ╲
+ *  •              θ╲
+ *   ╲             ╌╌•
+ *    ╲r₁        ·   c₂
+ *    θ╲    ·
+ *    ╌╌•
+ *      c₁
+ *
+ * Given (c₁,r₁), (c₂,r₂) and p, we must find an angle θ such that two
+ * points p₁ and p₂ on the two circles are collinear with p. Then, the
+ * desired value of t is the ratio of the length of p₁p to the length
+ * of p₁p₂.
+ *
+ * So, we have six unknown values: (p₁x, p₁y), (p₂x, p₂y), θ and t.
+ * We can also write six equations that constrain the problem:
+ *
+ * Point p₁ is a distance r₁ from c₁ at an angle of θ:
+ *
+ *	1. p₁x = c₁x + r₁·cos θ
+ *	2. p₁y = c₁y + r₁·sin θ
+ *
+ * Point p₂ is a distance r₂ from c₂ at an angle of θ:
+ *
+ *	3. p₂x = c₂x + r2·cos θ
+ *	4. p₂y = c₂y + r2·sin θ
+ *
+ * Point p lies at a fraction t along the line segment p₁p₂:
+ *
+ *	5. px = t·p₂x + (1-t)·p₁x
+ *	6. py = t·p₂y + (1-t)·p₁y
+ *
+ * To solve, first subtitute 1-4 into 5 and 6:
+ *
+ * px = t·(c₂x + r₂·cos θ) + (1-t)·(c₁x + r₁·cos θ)
+ * py = t·(c₂y + r₂·sin θ) + (1-t)·(c₁y + r₁·sin θ)
+ *
+ * Then solve each for cos θ and sin θ expressed as a function of t:
+ *
+ * cos θ = (-(c₂x - c₁x)·t + (px - c₁x)) / ((r₂-r₁)·t + r₁)
+ * sin θ = (-(c₂y - c₁y)·t + (py - c₁y)) / ((r₂-r₁)·t + r₁)
+ *
+ * To simplify this a bit, we define new variables for several of the
+ * common terms as shown below:
+ *
+ *              pâ‚‚
+ *           p  •
+ *           •   ╲
+ *        ·  ┆    ╲r₂
+ *  p₁ ·     ┆     ╲
+ *  •     pdy┆      ╲
+ *   ╲       ┆       •c₂
+ *    ╲r₁    ┆   ·   ┆
+ *     ╲    ·┆       ┆cdy
+ *      •╌╌╌╌┴╌╌╌╌╌╌╌┘
+ *    c₁  pdx   cdx
+ *
+ * cdx = (c₂x - c₁x)
+ * cdy = (c₂y - c₁y)
+ *  dr =  r₂-r₁
+ * pdx =  px - c₁x
+ * pdy =  py - c₁y
+ *
+ * Note that cdx, cdy, and dr do not depend on point p at all, so can
+ * be pre-computed for the entire gradient. The simplifed equations
+ * are now:
+ *
+ * cos θ = (-cdx·t + pdx) / (dr·t + r₁)
+ * sin θ = (-cdy·t + pdy) / (dr·t + r₁)
+ *
+ * Finally, to get a single function of t and eliminate the last
+ * unknown θ, we use the identity sin²θ + cos²θ = 1. First, square
+ * each equation, (we knew a quadratic was coming since it must be
+ * possible to obtain two solutions in some cases):
+ *
+ * cos²θ = (cdx²t² - 2·cdx·pdx·t + pdx²) / (dr²·t² + 2·r₁·dr·t + r₁²)
+ * sin²θ = (cdy²t² - 2·cdy·pdy·t + pdy²) / (dr²·t² + 2·r₁·dr·t + r₁²)
+ *
+ * Then add both together, set the result equal to 1, and express as a
+ * standard quadratic equation in t of the form At² + Bt + C = 0
+ *
+ * (cdx² + cdy² - dr²)·t² - 2·(cdx·pdx + cdy·pdy + r₁·dr)·t + (pdx² + pdy² - r₁²) = 0
+ *
+ * In other words:
+ *
+ * A = cdx² + cdy² - dr²
+ * B = -2·(pdx·cdx + pdy·cdy + r₁·dr)
+ * C = pdx² + pdy² - r₁²
+ *
+ * And again, notice that A does not depend on p, so can be
+ * precomputed. From here we just use the quadratic formula to solve
+ * for t:
+ *
+ * t = (-2·B ± ⎷(B² - 4·A·C)) / 2·A
+ */
+        /* radial or conical */
+        pixman_bool_t affine = TRUE;
+        double cx = 1.;
+        double cy = 0.;
+        double cz = 0.;
+	double rx = x + 0.5;
+	double ry = y + 0.5;
+        double rz = 1.;
+
+        if (pict->common.transform) {
+            pixman_vector_t v;
+            /* reference point is the center of the pixel */
+            v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
+            v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+            v.vector[2] = pixman_fixed_1;
+            if (!pixman_transform_point_3d (pict->common.transform, &v))
+                return;
+
+            cx = pict->common.transform->matrix[0][0]/65536.;
+            cy = pict->common.transform->matrix[1][0]/65536.;
+            cz = pict->common.transform->matrix[2][0]/65536.;
+            rx = v.vector[0]/65536.;
+            ry = v.vector[1]/65536.;
+            rz = v.vector[2]/65536.;
+            affine = pict->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
+        }
+
+        if (pict->common.type == RADIAL) {
+	    radial_gradient_t *radial = (radial_gradient_t *)pict;
+            if (affine) {
+                while (buffer < end) {
+		    if (!mask || *mask++ & maskBits)
+		    {
+			double pdx, pdy;
+			double B, C;
+			double det;
+			double c1x = radial->c1.x / 65536.0;
+			double c1y = radial->c1.y / 65536.0;
+			double r1  = radial->c1.radius / 65536.0;
+                        pixman_fixed_48_16_t t;
+
+			pdx = rx - c1x;
+			pdy = ry - c1y;
+
+			B = -2 * (  pdx * radial->cdx
+				    + pdy * radial->cdy
+				    + r1 * radial->dr);
+			C = (pdx * pdx + pdy * pdy - r1 * r1);
+
+                        det = (B * B) - (4 * radial->A * C);
+			if (det < 0.0)
+			    det = 0.0;
+
+			if (radial->A < 0)
+			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
+			else
+			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+		    ++buffer;
+
+                    rx += cx;
+                    ry += cy;
+                }
+            } else {
+		/* projective */
+                while (buffer < end) {
+		    if (!mask || *mask++ & maskBits)
+		    {
+			double pdx, pdy;
+			double B, C;
+			double det;
+			double c1x = radial->c1.x / 65536.0;
+			double c1y = radial->c1.y / 65536.0;
+			double r1  = radial->c1.radius / 65536.0;
+                        pixman_fixed_48_16_t t;
+			double x, y;
+
+			if (rz != 0) {
+			    x = rx/rz;
+			    y = ry/rz;
+			} else {
+			    x = y = 0.;
+			}
+
+			pdx = x - c1x;
+			pdy = y - c1y;
+
+			B = -2 * (  pdx * radial->cdx
+				    + pdy * radial->cdy
+				    + r1 * radial->dr);
+			C = (pdx * pdx + pdy * pdy - r1 * r1);
+
+                        det = (B * B) - (4 * radial->A * C);
+			if (det < 0.0)
+			    det = 0.0;
+
+			if (radial->A < 0)
+			    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
+			else
+			    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+		    ++buffer;
+
+                    rx += cx;
+                    ry += cy;
+		    rz += cz;
+                }
+            }
+        } else /* SourcePictTypeConical */ {
+	    conical_gradient_t *conical = (conical_gradient_t *)pict;
+            double a = conical->angle/(180.*65536);
+            if (affine) {
+                rx -= conical->center.x/65536.;
+                ry -= conical->center.y/65536.;
+
+                while (buffer < end) {
+		    double angle;
+
+                    if (!mask || *mask++ & maskBits)
+		    {
+                        pixman_fixed_48_16_t   t;
+
+                        angle = atan2(ry, rx) + a;
+			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+
+                    ++buffer;
+                    rx += cx;
+                    ry += cy;
+                }
+            } else {
+                while (buffer < end) {
+                    double x, y;
+                    double angle;
+
+                    if (!mask || *mask++ & maskBits)
+                    {
+			pixman_fixed_48_16_t  t;
+
+			if (rz != 0) {
+			    x = rx/rz;
+			    y = ry/rz;
+			} else {
+			    x = y = 0.;
+			}
+			x -= conical->center.x/65536.;
+			y -= conical->center.y/65536.;
+			angle = atan2(y, x) + a;
+			t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
+
+			*(buffer) = _gradient_walker_pixel (&walker, t);
+		    }
+
+                    ++buffer;
+                    rx += cx;
+                    ry += cy;
+                    rz += cz;
+                }
+            }
+        }
+    }
+}
commit 76febfcd20c962a467d08c2ee9be612216ffa030
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Mar 21 18:06:54 2008 -0700

    Move fetch, fetchPixel, and store routines into pixman-access.c.

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 40ff20f..259c2a5 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -3,6 +3,8 @@ libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO)
 libpixman_1_la_LIBADD = @DEP_LIBS@ -lm
 libpixman_1_la_SOURCES =		\
 	pixman.h		\
+	pixman-access.c		\
+	pixman-access-accessors.c	\
 	pixman-region.c		\
 	pixman-private.h	\
 	pixman-image.c		\
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index eb3795d..4a772ee 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -24,6 +24,8 @@ endif
 SOURCES = \
 	pixman-region.c				\
 	pixman-image.c					\
+	pixman-access.c					\
+	pixman-access-accessors.c			\
 	pixman-combine.c				\
 	pixman-compose.c				\
 	pixman-compose-accessors.c	\
diff --git a/pixman/pixman-access-accessors.c b/pixman/pixman-access-accessors.c
new file mode 100644
index 0000000..3263582
--- /dev/null
+++ b/pixman/pixman-access-accessors.c
@@ -0,0 +1,3 @@
+#define PIXMAN_FB_ACCESSORS
+
+#include "pixman-access.c"
diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
new file mode 100644
index 0000000..94af206
--- /dev/null
+++ b/pixman/pixman-access.c
@@ -0,0 +1,1686 @@
+/*
+ *
+ * Copyright © 2000 Keith Packard, member of The XFree86 Project, Inc.
+ *             2005 Lars Knoll & Zack Rusin, Trolltech
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Keith Packard not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Keith Packard makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "pixman-private.h"
+
+#ifdef PIXMAN_FB_ACCESSORS
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+#else
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+#endif
+
+/*
+ * YV12 setup and access macros
+ */
+
+#define YV12_SETUP(pict) \
+	uint32_t *bits = pict->bits; \
+	int stride = pict->rowstride; \
+	int offset0 = stride < 0 ? \
+		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
+		stride * pict->height; \
+	int offset1 = stride < 0 ? \
+		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
+		offset0 + (offset0 >> 2)
+/* Note n trailing semicolon on the above macro; if it's there, then
+ * the typical usage of YV12_SETUP(pict); will have an extra trailing ;
+ * that some compilers will interpret as a statement -- and then any further
+ * variable declarations will cause an error.
+ */
+
+#define YV12_Y(line)		\
+    ((uint8_t *) ((bits) + (stride) * (line)))
+
+#define YV12_U(line)	      \
+    ((uint8_t *) ((bits) + offset1 + \
+		((stride) >> 1) * ((line) >> 1)))
+
+#define YV12_V(line)	      \
+    ((uint8_t *) ((bits) + offset0 + \
+		((stride) >> 1) * ((line) >> 1)))
+
+/*********************************** Fetch ************************************/
+
+static FASTCALL void
+fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    MEMCPY_WRAPPED(pict,
+                   buffer, (const uint32_t *)bits + x,
+		   width*sizeof(uint32_t));
+}
+
+static FASTCALL void
+fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint32_t *pixel = (const uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    while (pixel < end) {
+	*buffer++ = READ(pict, pixel++) | 0xff000000;
+    }
+}
+
+static FASTCALL void
+fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t p = READ(pict, pixel++);
+	*buffer++ = (p & 0xff00ff00) |
+	            ((p >> 16) & 0xff) |
+	    ((p & 0xff) << 16);
+    }
+}
+
+static FASTCALL void
+fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint32_t *pixel = (uint32_t *)bits + x;
+    const uint32_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t p = READ(pict, pixel++);
+	*buffer++ = 0xff000000 |
+	    (p & 0x0000ff00) |
+	    ((p >> 16) & 0xff) |
+	    ((p & 0xff) << 16);
+    }
+}
+
+static FASTCALL void
+fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
+    const uint8_t *end = pixel + 3*width;
+    while (pixel < end) {
+	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
+	pixel += 3;
+	*buffer++ = b;
+    }
+}
+
+static FASTCALL void
+fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
+    const uint8_t *end = pixel + 3*width;
+    while (pixel < end) {
+	uint32_t b = 0xff000000;
+#if IMAGE_BYTE_ORDER == MSBFirst
+	b |= (READ(pict, pixel++));
+	b |= (READ(pict, pixel++) << 8);
+	b |= (READ(pict, pixel++) << 16);
+#else
+	b |= (READ(pict, pixel++) << 16);
+	b |= (READ(pict, pixel++) << 8);
+	b |= (READ(pict, pixel++));
+#endif
+	*buffer++ = b;
+    }
+}
+
+static FASTCALL void
+fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t p = READ(pict, pixel++);
+	uint32_t r = (((p) << 3) & 0xf8) |
+	    (((p) << 5) & 0xfc00) |
+	    (((p) << 8) & 0xf80000);
+	r |= (r >> 5) & 0x70007;
+	r |= (r >> 6) & 0x300;
+	*buffer++ = 0xff000000 | r;
+    }
+}
+
+static FASTCALL void
+fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+	b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
+	g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
+	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
+	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
+	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
+	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
+	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
+	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	b = ((p & 0x000f) | ((p & 0x000f) << 4));
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	b = ((p & 0x000f) | ((p & 0x000f) << 4));
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b, a;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
+	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
+	*buffer++ = a | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint16_t *pixel = (const uint16_t *)bits + x;
+    const uint16_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
+	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
+	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	*buffer++ = READ(pict, pixel++) << 24;
+    }
+}
+
+static FASTCALL void
+fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
+	g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
+	b = (((p & 0x03)     ) |
+	     ((p & 0x03) << 2) |
+	     ((p & 0x03) << 4) |
+	     ((p & 0x03) << 6));
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	b = (((p & 0xc0)     ) |
+	     ((p & 0xc0) >> 2) |
+	     ((p & 0xc0) >> 4) |
+	     ((p & 0xc0) >> 6));
+	g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8;
+	r = (((p & 0x07)     ) |
+	     ((p & 0x07) << 3) |
+	     ((p & 0x06) << 6)) << 16;
+	*buffer++ = 0xff000000 | r | g | b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t   a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xc0) * 0x55) << 18;
+	r = ((p & 0x30) * 0x55) << 12;
+	g = ((p & 0x0c) * 0x55) << 6;
+	b = ((p & 0x03) * 0x55);
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t   a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+
+	a = ((p & 0xc0) * 0x55) << 18;
+	b = ((p & 0x30) * 0x55) >> 6;
+	g = ((p & 0x0c) * 0x55) << 6;
+	r = ((p & 0x03) * 0x55) << 16;
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint32_t  p = READ(pict, pixel++);
+	*buffer++ = indexed->rgba[p];
+    }
+}
+
+static FASTCALL void
+fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + x;
+    const uint8_t *end = pixel + width;
+    while (pixel < end) {
+	uint8_t p = READ(pict, pixel++) & 0xf;
+	*buffer++ = (p | (p << 4)) << 24;
+    }
+}
+
+#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
+#if IMAGE_BYTE_ORDER == MSBFirst
+#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
+#else
+#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
+#endif
+
+static FASTCALL void
+fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	p |= p << 4;
+	*buffer++ = p << 24;
+    }
+}
+
+static FASTCALL void
+fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	r = ((p & 0x8) * 0xff) << 13;
+	g = ((p & 0x6) * 0x55) << 7;
+	b = ((p & 0x1) * 0xff);
+	*buffer++ = 0xff000000|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	b = ((p & 0x8) * 0xff) >> 3;
+	g = ((p & 0x6) * 0x55) << 7;
+	r = ((p & 0x1) * 0xff) << 16;
+	*buffer++ = 0xff000000|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	a = ((p & 0x8) * 0xff) << 21;
+	r = ((p & 0x4) * 0xff) << 14;
+	g = ((p & 0x2) * 0xff) << 7;
+	b = ((p & 0x1) * 0xff);
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    uint32_t  a,r,g,b;
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	a = ((p & 0x8) * 0xff) << 21;
+	r = ((p & 0x4) * 0xff) >> 3;
+	g = ((p & 0x2) * 0xff) << 7;
+	b = ((p & 0x1) * 0xff) << 16;
+	*buffer++ = a|r|g|b;
+    }
+}
+
+static FASTCALL void
+fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = Fetch4(pict, bits, i + x);
+
+	*buffer++ = indexed->rgba[p];
+    }
+}
+
+
+static FASTCALL void
+fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
+	uint32_t  a;
+#if BITMAP_BIT_ORDER == MSBFirst
+	a = p >> (0x1f - ((i+x) & 0x1f));
+#else
+	a = p >> ((i+x) & 0x1f);
+#endif
+	a = a & 1;
+	a |= a << 1;
+	a |= a << 2;
+	a |= a << 4;
+	*buffer++ = a << 24;
+    }
+}
+
+static FASTCALL void
+fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
+{
+    const uint32_t *bits = pict->bits + y*pict->rowstride;
+    const pixman_indexed_t * indexed = pict->indexed;
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t p = READ(pict, bits + ((i+x) >> 5));
+	uint32_t a;
+#if BITMAP_BIT_ORDER == MSBFirst
+	a = p >> (0x1f - ((i+x) & 0x1f));
+#else
+	a = p >> ((i+x) & 0x1f);
+#endif
+	a = a & 1;
+	*buffer++ = indexed->rgba[a];
+    }
+}
+
+static FASTCALL void
+fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+    int   i;
+
+    const uint32_t *bits = pict->bits + pict->rowstride * line;
+
+    for (i = 0; i < width; i++)
+    {
+	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
+	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
+	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
+
+	/* R = 1.164(Y - 16) + 1.596(V - 128) */
+	r = 0x012b27 * y + 0x019a2e * v;
+	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	/* B = 1.164(Y - 16) + 2.018(U - 128) */
+	b = 0x012b27 * y + 0x0206a2 * u;
+
+    WRITE(pict, buffer++, 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
+    }
+}
+
+static FASTCALL void
+fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
+{
+    YV12_SETUP(pict);
+    uint8_t *pY = YV12_Y (line);
+    uint8_t *pU = YV12_U (line);
+    uint8_t *pV = YV12_V (line);
+    int16_t y, u, v;
+    int32_t r, g, b;
+    int   i;
+
+    for (i = 0; i < width; i++)
+    {
+	y = pY[x + i] - 16;
+	u = pU[(x + i) >> 1] - 128;
+	v = pV[(x + i) >> 1] - 128;
+
+	/* R = 1.164(Y - 16) + 1.596(V - 128) */
+	r = 0x012b27 * y + 0x019a2e * v;
+	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	/* B = 1.164(Y - 16) + 2.018(U - 128) */
+	b = 0x012b27 * y + 0x0206a2 * u;
+
+	WRITE(pict, buffer++, 0xff000000 |
+	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
+    }
+}
+
+fetchProc FETCH_PROC_FOR_PICTURE (bits_image_t * pict)
+{
+    switch(pict->format) {
+    case PIXMAN_a8r8g8b8: return fbFetch_a8r8g8b8;
+    case PIXMAN_x8r8g8b8: return fbFetch_x8r8g8b8;
+    case PIXMAN_a8b8g8r8: return fbFetch_a8b8g8r8;
+    case PIXMAN_x8b8g8r8: return fbFetch_x8b8g8r8;
+
+        /* 24bpp formats */
+    case PIXMAN_r8g8b8: return fbFetch_r8g8b8;
+    case PIXMAN_b8g8r8: return fbFetch_b8g8r8;
+
+        /* 16bpp formats */
+    case PIXMAN_r5g6b5: return fbFetch_r5g6b5;
+    case PIXMAN_b5g6r5: return fbFetch_b5g6r5;
+
+    case PIXMAN_a1r5g5b5: return fbFetch_a1r5g5b5;
+    case PIXMAN_x1r5g5b5: return fbFetch_x1r5g5b5;
+    case PIXMAN_a1b5g5r5: return fbFetch_a1b5g5r5;
+    case PIXMAN_x1b5g5r5: return fbFetch_x1b5g5r5;
+    case PIXMAN_a4r4g4b4: return fbFetch_a4r4g4b4;
+    case PIXMAN_x4r4g4b4: return fbFetch_x4r4g4b4;
+    case PIXMAN_a4b4g4r4: return fbFetch_a4b4g4r4;
+    case PIXMAN_x4b4g4r4: return fbFetch_x4b4g4r4;
+
+        /* 8bpp formats */
+    case PIXMAN_a8: return  fbFetch_a8;
+    case PIXMAN_r3g3b2: return fbFetch_r3g3b2;
+    case PIXMAN_b2g3r3: return fbFetch_b2g3r3;
+    case PIXMAN_a2r2g2b2: return fbFetch_a2r2g2b2;
+    case PIXMAN_a2b2g2r2: return fbFetch_a2b2g2r2;
+    case PIXMAN_c8: return  fbFetch_c8;
+    case PIXMAN_g8: return  fbFetch_c8;
+    case PIXMAN_x4a4: return fbFetch_x4a4;
+
+        /* 4bpp formats */
+    case PIXMAN_a4: return  fbFetch_a4;
+    case PIXMAN_r1g2b1: return fbFetch_r1g2b1;
+    case PIXMAN_b1g2r1: return fbFetch_b1g2r1;
+    case PIXMAN_a1r1g1b1: return fbFetch_a1r1g1b1;
+    case PIXMAN_a1b1g1r1: return fbFetch_a1b1g1r1;
+    case PIXMAN_c4: return  fbFetch_c4;
+    case PIXMAN_g4: return  fbFetch_c4;
+
+        /* 1bpp formats */
+    case PIXMAN_a1: return  fbFetch_a1;
+    case PIXMAN_g1: return  fbFetch_g1;
+
+        /* YUV formats */
+    case PIXMAN_yuy2: return fbFetch_yuy2;
+    case PIXMAN_yv12: return fbFetch_yv12;
+    }
+
+    return NULL;
+}
+
+/**************************** Pixel wise fetching *****************************/
+
+static FASTCALL uint32_t
+fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    return READ(pict, (uint32_t *)bits + offset);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    return READ(pict, (uint32_t *)bits + offset) | 0xff000000;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+
+    return ((pixel & 0xff000000) |
+	    ((pixel >> 16) & 0xff) |
+	    (pixel & 0x0000ff00) |
+	    ((pixel & 0xff) << 16));
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+
+    return ((0xff000000) |
+	    ((pixel >> 16) & 0xff) |
+	    (pixel & 0x0000ff00) |
+	    ((pixel & 0xff) << 16));
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
+#if IMAGE_BYTE_ORDER == MSBFirst
+    return (0xff000000 |
+	    (READ(pict, pixel + 0) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 2)));
+#else
+    return (0xff000000 |
+	    (READ(pict, pixel + 2) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 0)));
+#endif
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
+#if IMAGE_BYTE_ORDER == MSBFirst
+    return (0xff000000 |
+	    (READ(pict, pixel + 2) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 0)));
+#else
+    return (0xff000000 |
+	    (READ(pict, pixel + 0) << 16) |
+	    (READ(pict, pixel + 1) << 8) |
+	    (READ(pict, pixel + 2)));
+#endif
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
+    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
+    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
+    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
+    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
+    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
+    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
+    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
+    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
+    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
+    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
+    return (a | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+
+    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
+    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
+    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    return pixel << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
+    g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8;
+    b = (((pixel & 0x03)     ) |
+	 ((pixel & 0x03) << 2) |
+	 ((pixel & 0x03) << 4) |
+	 ((pixel & 0x03) << 6));
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    b = (((pixel & 0xc0)     ) |
+	 ((pixel & 0xc0) >> 2) |
+	 ((pixel & 0xc0) >> 4) |
+	 ((pixel & 0xc0) >> 6));
+    g = ((pixel & 0x38) | ((pixel & 0x38) >> 3) | ((pixel & 0x30) << 2)) << 8;
+    r = (((pixel & 0x07)     ) |
+	 ((pixel & 0x07) << 3) |
+	 ((pixel & 0x06) << 6)) << 16;
+    return (0xff000000 | r | g | b);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t   a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    a = ((pixel & 0xc0) * 0x55) << 18;
+    r = ((pixel & 0x30) * 0x55) << 12;
+    g = ((pixel & 0x0c) * 0x55) << 6;
+    b = ((pixel & 0x03) * 0x55);
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t   a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    a = ((pixel & 0xc0) * 0x55) << 18;
+    b = ((pixel & 0x30) * 0x55) >> 6;
+    g = ((pixel & 0x0c) * 0x55) << 6;
+    r = ((pixel & 0x03) * 0x55) << 16;
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_c8 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+    const pixman_indexed_t * indexed = pict->indexed;
+    return indexed->rgba[pixel];
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+
+    return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    pixel |= pixel << 4;
+    return pixel << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    r = ((pixel & 0x8) * 0xff) << 13;
+    g = ((pixel & 0x6) * 0x55) << 7;
+    b = ((pixel & 0x1) * 0xff);
+    return 0xff000000|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    b = ((pixel & 0x8) * 0xff) >> 3;
+    g = ((pixel & 0x6) * 0x55) << 7;
+    r = ((pixel & 0x1) * 0xff) << 16;
+    return 0xff000000|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    a = ((pixel & 0x8) * 0xff) << 21;
+    r = ((pixel & 0x4) * 0xff) << 14;
+    g = ((pixel & 0x2) * 0xff) << 7;
+    b = ((pixel & 0x1) * 0xff);
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t  a,r,g,b;
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+
+    a = ((pixel & 0x8) * 0xff) << 21;
+    r = ((pixel & 0x4) * 0xff) >> 3;
+    g = ((pixel & 0x2) * 0xff) << 7;
+    b = ((pixel & 0x1) * 0xff) << 16;
+    return a|r|g|b;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_c4 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = Fetch4(pict, bits, offset);
+    const pixman_indexed_t * indexed = pict->indexed;
+
+    return indexed->rgba[pixel];
+}
+
+
+static FASTCALL uint32_t
+fbFetchPixel_a1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t  pixel = READ(pict, bits + (offset >> 5));
+    uint32_t  a;
+#if BITMAP_BIT_ORDER == MSBFirst
+    a = pixel >> (0x1f - (offset & 0x1f));
+#else
+    a = pixel >> (offset & 0x1f);
+#endif
+    a = a & 1;
+    a |= a << 1;
+    a |= a << 2;
+    a |= a << 4;
+    return a << 24;
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
+{
+    uint32_t *bits = pict->bits + line*pict->rowstride;
+    uint32_t pixel = READ(pict, bits + (offset >> 5));
+    const pixman_indexed_t * indexed = pict->indexed;
+    uint32_t a;
+#if BITMAP_BIT_ORDER == MSBFirst
+    a = pixel >> (0x1f - (offset & 0x1f));
+#else
+    a = pixel >> (offset & 0x1f);
+#endif
+    a = a & 1;
+    return indexed->rgba[a];
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_yuy2 (bits_image_t *pict, int offset, int line)
+{
+    int16_t y, u, v;
+    int32_t r, g, b;
+
+    const uint32_t *bits = pict->bits + pict->rowstride * line;
+
+    y = ((uint8_t *) bits)[offset << 1] - 16;
+    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
+    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
+static FASTCALL uint32_t
+fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line)
+{
+    YV12_SETUP(pict);
+    int16_t y = YV12_Y (line)[offset] - 16;
+    int16_t u = YV12_U (line)[offset >> 1] - 128;
+    int16_t v = YV12_V (line)[offset >> 1] - 128;
+    int32_t r, g, b;
+
+    /* R = 1.164(Y - 16) + 1.596(V - 128) */
+    r = 0x012b27 * y + 0x019a2e * v;
+    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
+    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+    /* B = 1.164(Y - 16) + 2.018(U - 128) */
+    b = 0x012b27 * y + 0x0206a2 * u;
+
+    return 0xff000000 |
+	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
+	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
+	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
+}
+
+fetchPixelProc FETCH_PIXEL_PROC_FOR_PICTURE (bits_image_t * pict)
+{
+    switch(pict->format) {
+    case PIXMAN_a8r8g8b8: return fbFetchPixel_a8r8g8b8;
+    case PIXMAN_x8r8g8b8: return fbFetchPixel_x8r8g8b8;
+    case PIXMAN_a8b8g8r8: return fbFetchPixel_a8b8g8r8;
+    case PIXMAN_x8b8g8r8: return fbFetchPixel_x8b8g8r8;
+
+        /* 24bpp formats */
+    case PIXMAN_r8g8b8: return fbFetchPixel_r8g8b8;
+    case PIXMAN_b8g8r8: return fbFetchPixel_b8g8r8;
+
+        /* 16bpp formats */
+    case PIXMAN_r5g6b5: return fbFetchPixel_r5g6b5;
+    case PIXMAN_b5g6r5: return fbFetchPixel_b5g6r5;
+
+    case PIXMAN_a1r5g5b5: return fbFetchPixel_a1r5g5b5;
+    case PIXMAN_x1r5g5b5: return fbFetchPixel_x1r5g5b5;
+    case PIXMAN_a1b5g5r5: return fbFetchPixel_a1b5g5r5;
+    case PIXMAN_x1b5g5r5: return fbFetchPixel_x1b5g5r5;
+    case PIXMAN_a4r4g4b4: return fbFetchPixel_a4r4g4b4;
+    case PIXMAN_x4r4g4b4: return fbFetchPixel_x4r4g4b4;
+    case PIXMAN_a4b4g4r4: return fbFetchPixel_a4b4g4r4;
+    case PIXMAN_x4b4g4r4: return fbFetchPixel_x4b4g4r4;
+
+        /* 8bpp formats */
+    case PIXMAN_a8: return  fbFetchPixel_a8;
+    case PIXMAN_r3g3b2: return fbFetchPixel_r3g3b2;
+    case PIXMAN_b2g3r3: return fbFetchPixel_b2g3r3;
+    case PIXMAN_a2r2g2b2: return fbFetchPixel_a2r2g2b2;
+    case PIXMAN_a2b2g2r2: return fbFetchPixel_a2b2g2r2;
+    case PIXMAN_c8: return  fbFetchPixel_c8;
+    case PIXMAN_g8: return  fbFetchPixel_c8;
+    case PIXMAN_x4a4: return fbFetchPixel_x4a4;
+
+        /* 4bpp formats */
+    case PIXMAN_a4: return  fbFetchPixel_a4;
+    case PIXMAN_r1g2b1: return fbFetchPixel_r1g2b1;
+    case PIXMAN_b1g2r1: return fbFetchPixel_b1g2r1;
+    case PIXMAN_a1r1g1b1: return fbFetchPixel_a1r1g1b1;
+    case PIXMAN_a1b1g1r1: return fbFetchPixel_a1b1g1r1;
+    case PIXMAN_c4: return  fbFetchPixel_c4;
+    case PIXMAN_g4: return  fbFetchPixel_c4;
+
+        /* 1bpp formats */
+    case PIXMAN_a1: return  fbFetchPixel_a1;
+    case PIXMAN_g1: return  fbFetchPixel_g1;
+
+        /* YUV formats */
+    case PIXMAN_yuy2: return fbFetchPixel_yuy2;
+    case PIXMAN_yv12: return fbFetchPixel_yv12;
+    }
+
+    return NULL;
+}
+
+/*********************************** Store ************************************/
+
+#define Splita(v)	uint32_t	a = ((v) >> 24), r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
+#define Split(v)	uint32_t	r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
+
+static FASTCALL void
+fbStore_a8r8g8b8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
+}
+
+static FASTCALL void
+fbStore_x8r8g8b8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    for (i = 0; i < width; ++i)
+	WRITE(image, pixel++, values[i] & 0xffffff);
+}
+
+static FASTCALL void
+fbStore_a8b8g8r8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    for (i = 0; i < width; ++i)
+	WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+}
+
+static FASTCALL void
+fbStore_x8b8g8r8 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint32_t *pixel = (uint32_t *)bits + x;
+    for (i = 0; i < width; ++i)
+	WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+}
+
+static FASTCALL void
+fbStore_r8g8b8 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width,
+		const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
+    for (i = 0; i < width; ++i) {
+	Store24(image, pixel, values[i]);
+	pixel += 3;
+    }
+}
+
+static FASTCALL void
+fbStore_b8g8r8 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
+    for (i = 0; i < width; ++i) {
+	uint32_t val = values[i];
+#if IMAGE_BYTE_ORDER == MSBFirst
+	WRITE(image, pixel++, Blue(val));
+	WRITE(image, pixel++, Green(val));
+	WRITE(image, pixel++, Red(val));
+#else
+	WRITE(image, pixel++, Red(val));
+	WRITE(image, pixel++, Green(val));
+	WRITE(image, pixel++, Blue(val));
+#endif
+    }
+}
+
+static FASTCALL void
+fbStore_r5g6b5 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	uint32_t s = values[i];
+	WRITE(image, pixel++, ((s >> 3) & 0x001f) |
+	      ((s >> 5) & 0x07e0) |
+	      ((s >> 8) & 0xf800));
+    }
+}
+
+static FASTCALL void
+fbStore_b5g6r5 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((b << 8) & 0xf800) |
+	      ((g << 3) & 0x07e0) |
+	      ((r >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a1r5g5b5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0x8000) |
+	      ((r << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((b >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x1r5g5b5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((r << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((b >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a1b5g5r5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0x8000) |
+	      ((b << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((r >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x1b5g5r5 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((b << 7) & 0x7c00) |
+	      ((g << 2) & 0x03e0) |
+	      ((r >> 3)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a4r4g4b4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0xf000) |
+	      ((r << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((b >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x4r4g4b4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((r << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((b >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a4b4g4r4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a << 8) & 0xf000) |
+	      ((b << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((r >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_x4b4g4r4 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint16_t  *pixel = ((uint16_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++, ((b << 4) & 0x0f00) |
+	      ((g     ) & 0x00f0) |
+	      ((r >> 4)         ));
+    }
+}
+
+static FASTCALL void
+fbStore_a8 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	WRITE(image, pixel++, values[i] >> 24);
+    }
+}
+
+static FASTCALL void
+fbStore_r3g3b2 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++,
+	      ((r     ) & 0xe0) |
+	      ((g >> 3) & 0x1c) |
+	      ((b >> 6)       ));
+    }
+}
+
+static FASTCALL void
+fbStore_b2g3r3 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Split(values[i]);
+	WRITE(image, pixel++,
+	      ((b     ) & 0xc0) |
+	      ((g >> 2) & 0x1c) |
+	      ((r >> 5)       ));
+    }
+}
+
+static FASTCALL void
+fbStore_a2r2g2b2 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	Splita(values[i]);
+	WRITE(image, pixel++, ((a     ) & 0xc0) |
+	      ((r >> 2) & 0x30) |
+	      ((g >> 4) & 0x0c) |
+	      ((b >> 6)       ));
+    }
+}
+
+static FASTCALL void
+fbStore_c8 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
+    }
+}
+
+static FASTCALL void
+fbStore_x4a4 (pixman_image_t *image,
+	      uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    uint8_t   *pixel = ((uint8_t *) bits) + x;
+    for (i = 0; i < width; ++i) {
+	WRITE(image, pixel++, values[i] >> 28);
+    }
+}
+
+#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
+#if IMAGE_BYTE_ORDER == MSBFirst
+#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
+				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
+				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
+#else
+#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
+				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
+				   (Fetch8(img,l,o) & 0xf0) | (v)))
+#endif
+
+static FASTCALL void
+fbStore_a4 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	Store4(image, bits, i + x, values[i]>>28);
+    }
+}
+
+static FASTCALL void
+fbStore_r1g2b1 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+
+	Split(values[i]);
+	pixel = (((r >> 4) & 0x8) |
+		 ((g >> 5) & 0x6) |
+		 ((b >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_b1g2r1 (pixman_image_t *image,
+		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+
+	Split(values[i]);
+	pixel = (((b >> 4) & 0x8) |
+		 ((g >> 5) & 0x6) |
+		 ((r >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_a1r1g1b1 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+	Splita(values[i]);
+	pixel = (((a >> 4) & 0x8) |
+		 ((r >> 5) & 0x4) |
+		 ((g >> 6) & 0x2) |
+		 ((b >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_a1b1g1r1 (pixman_image_t *image,
+		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+	Splita(values[i]);
+	pixel = (((a >> 4) & 0x8) |
+		 ((b >> 5) & 0x4) |
+		 ((g >> 6) & 0x2) |
+		 ((r >> 7)      ));
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_c4 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  pixel;
+
+	pixel = miIndexToEnt24(indexed, values[i]);
+	Store4(image, bits, i + x, pixel);
+    }
+}
+
+static FASTCALL void
+fbStore_a1 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
+	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
+
+	uint32_t v = values[i] & 0x80000000 ? mask : 0;
+	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
+    }
+}
+
+static FASTCALL void
+fbStore_g1 (pixman_image_t *image,
+	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
+	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
+
+	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
+	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
+    }
+}
+
+
+storeProc STORE_PROC_FOR_PICTURE (bits_image_t * pict)
+{
+    switch(pict->format) {
+    case PIXMAN_a8r8g8b8: return fbStore_a8r8g8b8;
+    case PIXMAN_x8r8g8b8: return fbStore_x8r8g8b8;
+    case PIXMAN_a8b8g8r8: return fbStore_a8b8g8r8;
+    case PIXMAN_x8b8g8r8: return fbStore_x8b8g8r8;
+
+        /* 24bpp formats */
+    case PIXMAN_r8g8b8: return fbStore_r8g8b8;
+    case PIXMAN_b8g8r8: return fbStore_b8g8r8;
+
+        /* 16bpp formats */
+    case PIXMAN_r5g6b5: return fbStore_r5g6b5;
+    case PIXMAN_b5g6r5: return fbStore_b5g6r5;
+
+    case PIXMAN_a1r5g5b5: return fbStore_a1r5g5b5;
+    case PIXMAN_x1r5g5b5: return fbStore_x1r5g5b5;
+    case PIXMAN_a1b5g5r5: return fbStore_a1b5g5r5;
+    case PIXMAN_x1b5g5r5: return fbStore_x1b5g5r5;
+    case PIXMAN_a4r4g4b4: return fbStore_a4r4g4b4;
+    case PIXMAN_x4r4g4b4: return fbStore_x4r4g4b4;
+    case PIXMAN_a4b4g4r4: return fbStore_a4b4g4r4;
+    case PIXMAN_x4b4g4r4: return fbStore_x4b4g4r4;
+
+        /* 8bpp formats */
+    case PIXMAN_a8: return  fbStore_a8;
+    case PIXMAN_r3g3b2: return fbStore_r3g3b2;
+    case PIXMAN_b2g3r3: return fbStore_b2g3r3;
+    case PIXMAN_a2r2g2b2: return fbStore_a2r2g2b2;
+    case PIXMAN_c8: return  fbStore_c8;
+    case PIXMAN_g8: return  fbStore_c8;
+    case PIXMAN_x4a4: return fbStore_x4a4;
+
+        /* 4bpp formats */
+    case PIXMAN_a4: return  fbStore_a4;
+    case PIXMAN_r1g2b1: return fbStore_r1g2b1;
+    case PIXMAN_b1g2r1: return fbStore_b1g2r1;
+    case PIXMAN_a1r1g1b1: return fbStore_a1r1g1b1;
+    case PIXMAN_a1b1g1r1: return fbStore_a1b1g1r1;
+    case PIXMAN_c4: return  fbStore_c4;
+    case PIXMAN_g4: return  fbStore_c4;
+
+        /* 1bpp formats */
+    case PIXMAN_a1: return  fbStore_a1;
+    case PIXMAN_g1: return  fbStore_g1;
+    default:
+        return NULL;
+    }
+}
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index b75b461..f713c43 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -35,6 +35,20 @@
 
 #include "pixman-private.h"
 
+#ifdef PIXMAN_FB_ACCESSORS
+#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
+#define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions_accessors
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture_accessors
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture_accessors
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture_accessors
+#else
+#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
+#define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions
+#define FETCH_PROC_FOR_PICTURE pixman_fetchProcForPicture
+#define FETCH_PIXEL_PROC_FOR_PICTURE pixman_fetchPixelProcForPicture
+#define STORE_PROC_FOR_PICTURE pixman_storeProcForPicture
+#endif
+
 static unsigned int
 SourcePictureClassify (source_image_t *pict,
 		       int	       x,
@@ -97,1670 +111,11 @@ SourcePictureClassify (source_image_t *pict,
 
 #define SCANLINE_BUFFER_LENGTH 2048
 
-/*
- * YV12 setup and access macros
- */
-
-#define YV12_SETUP(pict) \
-	uint32_t *bits = pict->bits; \
-	int stride = pict->rowstride; \
-	int offset0 = stride < 0 ? \
-		((-stride) >> 1) * ((pict->height - 1) >> 1) - stride : \
-		stride * pict->height; \
-	int offset1 = stride < 0 ? \
-		offset0 + ((-stride) >> 1) * ((pict->height) >> 1) : \
-		offset0 + (offset0 >> 2)
-/* Note n trailing semicolon on the above macro; if it's there, then
- * the typical usage of YV12_SETUP(pict); will have an extra trailing ;
- * that some compilers will interpret as a statement -- and then any further
- * variable declarations will cause an error.
- */
-
-#define YV12_Y(line)		\
-    ((uint8_t *) ((bits) + (stride) * (line)))
-
-#define YV12_U(line)	      \
-    ((uint8_t *) ((bits) + offset1 + \
-		((stride) >> 1) * ((line) >> 1)))
-
-#define YV12_V(line)	      \
-    ((uint8_t *) ((bits) + offset0 + \
-		((stride) >> 1) * ((line) >> 1)))
-
-typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width, uint32_t *buffer);
-
-/*
- * All of the fetch functions
- */
-
-static FASTCALL void
-fbFetch_a8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    MEMCPY_WRAPPED(pict,
-                   buffer, (const uint32_t *)bits + x,
-		   width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbFetch_x8r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint32_t *pixel = (const uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	*buffer++ = READ(pict, pixel++) | 0xff000000;
-    }
-}
-
-static FASTCALL void
-fbFetch_a8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(pict, pixel++);
-	*buffer++ = (p & 0xff00ff00) |
-	            ((p >> 16) & 0xff) |
-	    ((p & 0xff) << 16);
-    }
-}
-
-static FASTCALL void
-fbFetch_x8b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint32_t *pixel = (uint32_t *)bits + x;
-    const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(pict, pixel++);
-	*buffer++ = 0xff000000 |
-	    (p & 0x0000ff00) |
-	    ((p >> 16) & 0xff) |
-	    ((p & 0xff) << 16);
-    }
-}
-
-static FASTCALL void
-fbFetch_r8g8b8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
-    const uint8_t *end = pixel + 3*width;
-    while (pixel < end) {
-	uint32_t b = Fetch24(pict, pixel) | 0xff000000;
-	pixel += 3;
-	*buffer++ = b;
-    }
-}
-
-static FASTCALL void
-fbFetch_b8g8r8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
-    const uint8_t *end = pixel + 3*width;
-    while (pixel < end) {
-	uint32_t b = 0xff000000;
-#if IMAGE_BYTE_ORDER == MSBFirst
-	b |= (READ(pict, pixel++));
-	b |= (READ(pict, pixel++) << 8);
-	b |= (READ(pict, pixel++) << 16);
-#else
-	b |= (READ(pict, pixel++) << 16);
-	b |= (READ(pict, pixel++) << 8);
-	b |= (READ(pict, pixel++));
-#endif
-	*buffer++ = b;
-    }
-}
-
-static FASTCALL void
-fbFetch_r5g6b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(pict, pixel++);
-	uint32_t r = (((p) << 3) & 0xf8) |
-	    (((p) << 5) & 0xfc00) |
-	    (((p) << 8) & 0xf80000);
-	r |= (r >> 5) & 0x70007;
-	r |= (r >> 6) & 0x300;
-	*buffer++ = 0xff000000 | r;
-    }
-}
-
-static FASTCALL void
-fbFetch_b5g6r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-	b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
-	g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
-	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
-	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x1r5g5b5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
-	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x1b5g5r5 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
-	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
-	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
-	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	b = ((p & 0x000f) | ((p & 0x000f) << 4));
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x4r4g4b4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	b = ((p & 0x000f) | ((p & 0x000f) << 4));
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
-	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
-	*buffer++ = a | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_x4b4g4r4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint16_t *pixel = (const uint16_t *)bits + x;
-    const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
-	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
-	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	*buffer++ = READ(pict, pixel++) << 24;
-    }
-}
-
-static FASTCALL void
-fbFetch_r3g3b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
-	g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
-	b = (((p & 0x03)     ) |
-	     ((p & 0x03) << 2) |
-	     ((p & 0x03) << 4) |
-	     ((p & 0x03) << 6));
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_b2g3r3 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	b = (((p & 0xc0)     ) |
-	     ((p & 0xc0) >> 2) |
-	     ((p & 0xc0) >> 4) |
-	     ((p & 0xc0) >> 6));
-	g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8;
-	r = (((p & 0x07)     ) |
-	     ((p & 0x07) << 3) |
-	     ((p & 0x06) << 6)) << 16;
-	*buffer++ = 0xff000000 | r | g | b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a2r2g2b2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t   a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xc0) * 0x55) << 18;
-	r = ((p & 0x30) * 0x55) << 12;
-	g = ((p & 0x0c) * 0x55) << 6;
-	b = ((p & 0x03) * 0x55);
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a2b2g2r2 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t   a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-
-	a = ((p & 0xc0) * 0x55) << 18;
-	b = ((p & 0x30) * 0x55) >> 6;
-	g = ((p & 0x0c) * 0x55) << 6;
-	r = ((p & 0x03) * 0x55) << 16;
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_c8 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const pixman_indexed_t * indexed = pict->indexed;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(pict, pixel++);
-	*buffer++ = indexed->rgba[p];
-    }
-}
-
-static FASTCALL void
-fbFetch_x4a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + x;
-    const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint8_t p = READ(pict, pixel++) & 0xf;
-	*buffer++ = (p | (p << 4)) << 24;
-    }
-}
-
-#define Fetch8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
-#if IMAGE_BYTE_ORDER == MSBFirst
-#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) & 0xf : Fetch8(img,l,o) >> 4)
-#else
-#define Fetch4(img,l,o)    ((o) & 2 ? Fetch8(img,l,o) >> 4 : Fetch8(img,l,o) & 0xf)
-#endif
-
-static FASTCALL void
-fbFetch_a4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	p |= p << 4;
-	*buffer++ = p << 24;
-    }
-}
-
-static FASTCALL void
-fbFetch_r1g2b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	r = ((p & 0x8) * 0xff) << 13;
-	g = ((p & 0x6) * 0x55) << 7;
-	b = ((p & 0x1) * 0xff);
-	*buffer++ = 0xff000000|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_b1g2r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	b = ((p & 0x8) * 0xff) >> 3;
-	g = ((p & 0x6) * 0x55) << 7;
-	r = ((p & 0x1) * 0xff) << 16;
-	*buffer++ = 0xff000000|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1r1g1b1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	a = ((p & 0x8) * 0xff) << 21;
-	r = ((p & 0x4) * 0xff) << 14;
-	g = ((p & 0x2) * 0xff) << 7;
-	b = ((p & 0x1) * 0xff);
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_a1b1g1r1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    uint32_t  a,r,g,b;
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	a = ((p & 0x8) * 0xff) << 21;
-	r = ((p & 0x4) * 0xff) >> 3;
-	g = ((p & 0x2) * 0xff) << 7;
-	b = ((p & 0x1) * 0xff) << 16;
-	*buffer++ = a|r|g|b;
-    }
-}
-
-static FASTCALL void
-fbFetch_c4 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const pixman_indexed_t * indexed = pict->indexed;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = Fetch4(pict, bits, i + x);
-
-	*buffer++ = indexed->rgba[p];
-    }
-}
-
-
-static FASTCALL void
-fbFetch_a1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = READ(pict, bits + ((i + x) >> 5));
-	uint32_t  a;
-#if BITMAP_BIT_ORDER == MSBFirst
-	a = p >> (0x1f - ((i+x) & 0x1f));
-#else
-	a = p >> ((i+x) & 0x1f);
-#endif
-	a = a & 1;
-	a |= a << 1;
-	a |= a << 2;
-	a |= a << 4;
-	*buffer++ = a << 24;
-    }
-}
-
-static FASTCALL void
-fbFetch_g1 (bits_image_t *pict, int x, int y, int width, uint32_t *buffer)
-{
-    const uint32_t *bits = pict->bits + y*pict->rowstride;
-    const pixman_indexed_t * indexed = pict->indexed;
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t p = READ(pict, bits + ((i+x) >> 5));
-	uint32_t a;
-#if BITMAP_BIT_ORDER == MSBFirst
-	a = p >> (0x1f - ((i+x) & 0x1f));
-#else
-	a = p >> ((i+x) & 0x1f);
-#endif
-	a = a & 1;
-	*buffer++ = indexed->rgba[a];
-    }
-}
-
-static FASTCALL void
-fbFetch_yuy2 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
-{
-    int16_t y, u, v;
-    int32_t r, g, b;
-    int   i;
-
-    const uint32_t *bits = pict->bits + pict->rowstride * line;
-
-    for (i = 0; i < width; i++)
-    {
-	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
-	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
-	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
-
-	/* R = 1.164(Y - 16) + 1.596(V - 128) */
-	r = 0x012b27 * y + 0x019a2e * v;
-	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-	/* B = 1.164(Y - 16) + 2.018(U - 128) */
-	b = 0x012b27 * y + 0x0206a2 * u;
-
-    WRITE(pict, buffer++, 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
-    }
-}
-
-static FASTCALL void
-fbFetch_yv12 (bits_image_t *pict, int x, int line, int width, uint32_t *buffer)
-{
-    YV12_SETUP(pict);
-    uint8_t *pY = YV12_Y (line);
-    uint8_t *pU = YV12_U (line);
-    uint8_t *pV = YV12_V (line);
-    int16_t y, u, v;
-    int32_t r, g, b;
-    int   i;
-
-    for (i = 0; i < width; i++)
-    {
-	y = pY[x + i] - 16;
-	u = pU[(x + i) >> 1] - 128;
-	v = pV[(x + i) >> 1] - 128;
-
-	/* R = 1.164(Y - 16) + 1.596(V - 128) */
-	r = 0x012b27 * y + 0x019a2e * v;
-	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-	/* B = 1.164(Y - 16) + 2.018(U - 128) */
-	b = 0x012b27 * y + 0x0206a2 * u;
-
-	WRITE(pict, buffer++, 0xff000000 |
-	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	    (b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0));
-    }
-}
-
-static fetchProc fetchProcForPicture (bits_image_t * pict)
-{
-    switch(pict->format) {
-    case PIXMAN_a8r8g8b8: return fbFetch_a8r8g8b8;
-    case PIXMAN_x8r8g8b8: return fbFetch_x8r8g8b8;
-    case PIXMAN_a8b8g8r8: return fbFetch_a8b8g8r8;
-    case PIXMAN_x8b8g8r8: return fbFetch_x8b8g8r8;
-
-        /* 24bpp formats */
-    case PIXMAN_r8g8b8: return fbFetch_r8g8b8;
-    case PIXMAN_b8g8r8: return fbFetch_b8g8r8;
-
-        /* 16bpp formats */
-    case PIXMAN_r5g6b5: return fbFetch_r5g6b5;
-    case PIXMAN_b5g6r5: return fbFetch_b5g6r5;
-
-    case PIXMAN_a1r5g5b5: return fbFetch_a1r5g5b5;
-    case PIXMAN_x1r5g5b5: return fbFetch_x1r5g5b5;
-    case PIXMAN_a1b5g5r5: return fbFetch_a1b5g5r5;
-    case PIXMAN_x1b5g5r5: return fbFetch_x1b5g5r5;
-    case PIXMAN_a4r4g4b4: return fbFetch_a4r4g4b4;
-    case PIXMAN_x4r4g4b4: return fbFetch_x4r4g4b4;
-    case PIXMAN_a4b4g4r4: return fbFetch_a4b4g4r4;
-    case PIXMAN_x4b4g4r4: return fbFetch_x4b4g4r4;
-
-        /* 8bpp formats */
-    case PIXMAN_a8: return  fbFetch_a8;
-    case PIXMAN_r3g3b2: return fbFetch_r3g3b2;
-    case PIXMAN_b2g3r3: return fbFetch_b2g3r3;
-    case PIXMAN_a2r2g2b2: return fbFetch_a2r2g2b2;
-    case PIXMAN_a2b2g2r2: return fbFetch_a2b2g2r2;
-    case PIXMAN_c8: return  fbFetch_c8;
-    case PIXMAN_g8: return  fbFetch_c8;
-    case PIXMAN_x4a4: return fbFetch_x4a4;
-
-        /* 4bpp formats */
-    case PIXMAN_a4: return  fbFetch_a4;
-    case PIXMAN_r1g2b1: return fbFetch_r1g2b1;
-    case PIXMAN_b1g2r1: return fbFetch_b1g2r1;
-    case PIXMAN_a1r1g1b1: return fbFetch_a1r1g1b1;
-    case PIXMAN_a1b1g1r1: return fbFetch_a1b1g1r1;
-    case PIXMAN_c4: return  fbFetch_c4;
-    case PIXMAN_g4: return  fbFetch_c4;
-
-        /* 1bpp formats */
-    case PIXMAN_a1: return  fbFetch_a1;
-    case PIXMAN_g1: return  fbFetch_g1;
-
-        /* YUV formats */
-    case PIXMAN_yuy2: return fbFetch_yuy2;
-    case PIXMAN_yv12: return fbFetch_yv12;
-    }
-
-    return NULL;
-}
-
-/*
- * Pixel wise fetching
- */
-
-typedef FASTCALL uint32_t (*fetchPixelProc)(bits_image_t *pict, int offset, int line);
-
-static FASTCALL uint32_t
-fbFetchPixel_a8r8g8b8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    return READ(pict, (uint32_t *)bits + offset);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x8r8g8b8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    return READ(pict, (uint32_t *)bits + offset) | 0xff000000;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a8b8g8r8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
-
-    return ((pixel & 0xff000000) |
-	    ((pixel >> 16) & 0xff) |
-	    (pixel & 0x0000ff00) |
-	    ((pixel & 0xff) << 16));
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x8b8g8r8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
-
-    return ((0xff000000) |
-	    ((pixel >> 16) & 0xff) |
-	    (pixel & 0x0000ff00) |
-	    ((pixel & 0xff) << 16));
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r8g8b8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
-#if IMAGE_BYTE_ORDER == MSBFirst
-    return (0xff000000 |
-	    (READ(pict, pixel + 0) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 2)));
-#else
-    return (0xff000000 |
-	    (READ(pict, pixel + 2) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 0)));
-#endif
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b8g8r8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
-#if IMAGE_BYTE_ORDER == MSBFirst
-    return (0xff000000 |
-	    (READ(pict, pixel + 2) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 0)));
-#else
-    return (0xff000000 |
-	    (READ(pict, pixel + 0) << 16) |
-	    (READ(pict, pixel + 1) << 8) |
-	    (READ(pict, pixel + 2)));
-#endif
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r5g6b5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
-    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
-    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b5g6r5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
-    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
-    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1r5g5b5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
-    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x1r5g5b5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1b5g5r5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
-    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x1b5g5r5 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
-    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
-    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a4r4g4b4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
-    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x4r4g4b4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a4b4g4r4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
-    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
-    return (a | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x4b4g4r4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
-
-    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
-    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
-    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    return pixel << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r3g3b2 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
-    g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8;
-    b = (((pixel & 0x03)     ) |
-	 ((pixel & 0x03) << 2) |
-	 ((pixel & 0x03) << 4) |
-	 ((pixel & 0x03) << 6));
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b2g3r3 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    b = (((pixel & 0xc0)     ) |
-	 ((pixel & 0xc0) >> 2) |
-	 ((pixel & 0xc0) >> 4) |
-	 ((pixel & 0xc0) >> 6));
-    g = ((pixel & 0x38) | ((pixel & 0x38) >> 3) | ((pixel & 0x30) << 2)) << 8;
-    r = (((pixel & 0x07)     ) |
-	 ((pixel & 0x07) << 3) |
-	 ((pixel & 0x06) << 6)) << 16;
-    return (0xff000000 | r | g | b);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a2r2g2b2 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t   a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    a = ((pixel & 0xc0) * 0x55) << 18;
-    r = ((pixel & 0x30) * 0x55) << 12;
-    g = ((pixel & 0x0c) * 0x55) << 6;
-    b = ((pixel & 0x03) * 0x55);
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a2b2g2r2 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t   a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    a = ((pixel & 0xc0) * 0x55) << 18;
-    b = ((pixel & 0x30) * 0x55) >> 6;
-    g = ((pixel & 0x0c) * 0x55) << 6;
-    r = ((pixel & 0x03) * 0x55) << 16;
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_c8 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-    const pixman_indexed_t * indexed = pict->indexed;
-    return indexed->rgba[pixel];
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_x4a4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-
-    return ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    pixel |= pixel << 4;
-    return pixel << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_r1g2b1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    r = ((pixel & 0x8) * 0xff) << 13;
-    g = ((pixel & 0x6) * 0x55) << 7;
-    b = ((pixel & 0x1) * 0xff);
-    return 0xff000000|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_b1g2r1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    b = ((pixel & 0x8) * 0xff) >> 3;
-    g = ((pixel & 0x6) * 0x55) << 7;
-    r = ((pixel & 0x1) * 0xff) << 16;
-    return 0xff000000|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1r1g1b1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    a = ((pixel & 0x8) * 0xff) << 21;
-    r = ((pixel & 0x4) * 0xff) << 14;
-    g = ((pixel & 0x2) * 0xff) << 7;
-    b = ((pixel & 0x1) * 0xff);
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_a1b1g1r1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t  a,r,g,b;
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-
-    a = ((pixel & 0x8) * 0xff) << 21;
-    r = ((pixel & 0x4) * 0xff) >> 3;
-    g = ((pixel & 0x2) * 0xff) << 7;
-    b = ((pixel & 0x1) * 0xff) << 16;
-    return a|r|g|b;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_c4 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = Fetch4(pict, bits, offset);
-    const pixman_indexed_t * indexed = pict->indexed;
-
-    return indexed->rgba[pixel];
-}
-
-
-static FASTCALL uint32_t
-fbFetchPixel_a1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t  pixel = READ(pict, bits + (offset >> 5));
-    uint32_t  a;
-#if BITMAP_BIT_ORDER == MSBFirst
-    a = pixel >> (0x1f - (offset & 0x1f));
-#else
-    a = pixel >> (offset & 0x1f);
-#endif
-    a = a & 1;
-    a |= a << 1;
-    a |= a << 2;
-    a |= a << 4;
-    return a << 24;
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_g1 (bits_image_t *pict, int offset, int line)
-{
-    uint32_t *bits = pict->bits + line*pict->rowstride;
-    uint32_t pixel = READ(pict, bits + (offset >> 5));
-    const pixman_indexed_t * indexed = pict->indexed;
-    uint32_t a;
-#if BITMAP_BIT_ORDER == MSBFirst
-    a = pixel >> (0x1f - (offset & 0x1f));
-#else
-    a = pixel >> (offset & 0x1f);
-#endif
-    a = a & 1;
-    return indexed->rgba[a];
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_yuy2 (bits_image_t *pict, int offset, int line)
-{
-    int16_t y, u, v;
-    int32_t r, g, b;
-
-    const uint32_t *bits = pict->bits + pict->rowstride * line;
-
-    y = ((uint8_t *) bits)[offset << 1] - 16;
-    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
-    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
-
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-static FASTCALL uint32_t
-fbFetchPixel_yv12 (bits_image_t *pict, int offset, int line)
-{
-    YV12_SETUP(pict);
-    int16_t y = YV12_Y (line)[offset] - 16;
-    int16_t u = YV12_U (line)[offset >> 1] - 128;
-    int16_t v = YV12_V (line)[offset >> 1] - 128;
-    int32_t r, g, b;
-
-    /* R = 1.164(Y - 16) + 1.596(V - 128) */
-    r = 0x012b27 * y + 0x019a2e * v;
-    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
-    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
-    /* B = 1.164(Y - 16) + 2.018(U - 128) */
-    b = 0x012b27 * y + 0x0206a2 * u;
-
-    return 0xff000000 |
-	(r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
-	(g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
-	(b >= 0 ? b < 0x1000000 ? (b >> 16) & 0x0000ff : 0x0000ff : 0);
-}
-
-static fetchPixelProc fetchPixelProcForPicture (bits_image_t * pict)
-{
-    switch(pict->format) {
-    case PIXMAN_a8r8g8b8: return fbFetchPixel_a8r8g8b8;
-    case PIXMAN_x8r8g8b8: return fbFetchPixel_x8r8g8b8;
-    case PIXMAN_a8b8g8r8: return fbFetchPixel_a8b8g8r8;
-    case PIXMAN_x8b8g8r8: return fbFetchPixel_x8b8g8r8;
-
-        /* 24bpp formats */
-    case PIXMAN_r8g8b8: return fbFetchPixel_r8g8b8;
-    case PIXMAN_b8g8r8: return fbFetchPixel_b8g8r8;
-
-        /* 16bpp formats */
-    case PIXMAN_r5g6b5: return fbFetchPixel_r5g6b5;
-    case PIXMAN_b5g6r5: return fbFetchPixel_b5g6r5;
-
-    case PIXMAN_a1r5g5b5: return fbFetchPixel_a1r5g5b5;
-    case PIXMAN_x1r5g5b5: return fbFetchPixel_x1r5g5b5;
-    case PIXMAN_a1b5g5r5: return fbFetchPixel_a1b5g5r5;
-    case PIXMAN_x1b5g5r5: return fbFetchPixel_x1b5g5r5;
-    case PIXMAN_a4r4g4b4: return fbFetchPixel_a4r4g4b4;
-    case PIXMAN_x4r4g4b4: return fbFetchPixel_x4r4g4b4;
-    case PIXMAN_a4b4g4r4: return fbFetchPixel_a4b4g4r4;
-    case PIXMAN_x4b4g4r4: return fbFetchPixel_x4b4g4r4;
-
-        /* 8bpp formats */
-    case PIXMAN_a8: return  fbFetchPixel_a8;
-    case PIXMAN_r3g3b2: return fbFetchPixel_r3g3b2;
-    case PIXMAN_b2g3r3: return fbFetchPixel_b2g3r3;
-    case PIXMAN_a2r2g2b2: return fbFetchPixel_a2r2g2b2;
-    case PIXMAN_a2b2g2r2: return fbFetchPixel_a2b2g2r2;
-    case PIXMAN_c8: return  fbFetchPixel_c8;
-    case PIXMAN_g8: return  fbFetchPixel_c8;
-    case PIXMAN_x4a4: return fbFetchPixel_x4a4;
-
-        /* 4bpp formats */
-    case PIXMAN_a4: return  fbFetchPixel_a4;
-    case PIXMAN_r1g2b1: return fbFetchPixel_r1g2b1;
-    case PIXMAN_b1g2r1: return fbFetchPixel_b1g2r1;
-    case PIXMAN_a1r1g1b1: return fbFetchPixel_a1r1g1b1;
-    case PIXMAN_a1b1g1r1: return fbFetchPixel_a1b1g1r1;
-    case PIXMAN_c4: return  fbFetchPixel_c4;
-    case PIXMAN_g4: return  fbFetchPixel_c4;
-
-        /* 1bpp formats */
-    case PIXMAN_a1: return  fbFetchPixel_a1;
-    case PIXMAN_g1: return  fbFetchPixel_g1;
-
-        /* YUV formats */
-    case PIXMAN_yuy2: return fbFetchPixel_yuy2;
-    case PIXMAN_yv12: return fbFetchPixel_yv12;
-    }
-
-    return NULL;
-}
-
-
-/*
- * All the store functions
- */
-
-typedef FASTCALL void (*storeProc) (pixman_image_t *image,
-				    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed);
-
-#define Splita(v)	uint32_t	a = ((v) >> 24), r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
-#define Split(v)	uint32_t	r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
-
-static FASTCALL void
-fbStore_a8r8g8b8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbStore_x8r8g8b8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint32_t *pixel = (uint32_t *)bits + x;
-    for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, values[i] & 0xffffff);
-}
-
-static FASTCALL void
-fbStore_a8b8g8r8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint32_t *pixel = (uint32_t *)bits + x;
-    for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
-}
-
-static FASTCALL void
-fbStore_x8b8g8r8 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint32_t *pixel = (uint32_t *)bits + x;
-    for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
-}
-
-static FASTCALL void
-fbStore_r8g8b8 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width,
-		const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
-    for (i = 0; i < width; ++i) {
-	Store24(image, pixel, values[i]);
-	pixel += 3;
-    }
-}
-
-static FASTCALL void
-fbStore_b8g8r8 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
-    for (i = 0; i < width; ++i) {
-	uint32_t val = values[i];
-#if IMAGE_BYTE_ORDER == MSBFirst
-	WRITE(image, pixel++, Blue(val));
-	WRITE(image, pixel++, Green(val));
-	WRITE(image, pixel++, Red(val));
-#else
-	WRITE(image, pixel++, Red(val));
-	WRITE(image, pixel++, Green(val));
-	WRITE(image, pixel++, Blue(val));
-#endif
-    }
-}
-
-static FASTCALL void
-fbStore_r5g6b5 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	uint32_t s = values[i];
-	WRITE(image, pixel++, ((s >> 3) & 0x001f) |
-	      ((s >> 5) & 0x07e0) |
-	      ((s >> 8) & 0xf800));
-    }
-}
-
-static FASTCALL void
-fbStore_b5g6r5 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((b << 8) & 0xf800) |
-	      ((g << 3) & 0x07e0) |
-	      ((r >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a1r5g5b5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0x8000) |
-	      ((r << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((b >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x1r5g5b5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((r << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((b >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a1b5g5r5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0x8000) |
-	      ((b << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((r >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x1b5g5r5 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((b << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((r >> 3)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a4r4g4b4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0xf000) |
-	      ((r << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((b >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x4r4g4b4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((r << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((b >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a4b4g4r4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0xf000) |
-	      ((b << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((r >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_x4b4g4r4 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint16_t  *pixel = ((uint16_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++, ((b << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((r >> 4)         ));
-    }
-}
-
-static FASTCALL void
-fbStore_a8 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, values[i] >> 24);
-    }
-}
-
-static FASTCALL void
-fbStore_r3g3b2 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++,
-	      ((r     ) & 0xe0) |
-	      ((g >> 3) & 0x1c) |
-	      ((b >> 6)       ));
-    }
-}
-
-static FASTCALL void
-fbStore_b2g3r3 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Split(values[i]);
-	WRITE(image, pixel++,
-	      ((b     ) & 0xc0) |
-	      ((g >> 2) & 0x1c) |
-	      ((r >> 5)       ));
-    }
-}
-
-static FASTCALL void
-fbStore_a2r2g2b2 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	Splita(values[i]);
-	WRITE(image, pixel++, ((a     ) & 0xc0) |
-	      ((r >> 2) & 0x30) |
-	      ((g >> 4) & 0x0c) |
-	      ((b >> 6)       ));
-    }
-}
-
-static FASTCALL void
-fbStore_c8 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, miIndexToEnt24(indexed,values[i]));
-    }
-}
-
-static FASTCALL void
-fbStore_x4a4 (pixman_image_t *image,
-	      uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    uint8_t   *pixel = ((uint8_t *) bits) + x;
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, values[i] >> 28);
-    }
-}
-
-#define Store8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
-#if IMAGE_BYTE_ORDER == MSBFirst
-#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?				\
-				   (Fetch8(img,l,o) & 0xf0) | (v) :		\
-				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4)))
-#else
-#define Store4(img,l,o,v)  Store8(img,l,o,((o) & 4 ?			       \
-				   (Fetch8(img,l,o) & 0x0f) | ((v) << 4) : \
-				   (Fetch8(img,l,o) & 0xf0) | (v)))
-#endif
-
-static FASTCALL void
-fbStore_a4 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	Store4(image, bits, i + x, values[i]>>28);
-    }
-}
-
-static FASTCALL void
-fbStore_r1g2b1 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	Split(values[i]);
-	pixel = (((r >> 4) & 0x8) |
-		 ((g >> 5) & 0x6) |
-		 ((b >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_b1g2r1 (pixman_image_t *image,
-		uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	Split(values[i]);
-	pixel = (((b >> 4) & 0x8) |
-		 ((g >> 5) & 0x6) |
-		 ((r >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_a1r1g1b1 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-	Splita(values[i]);
-	pixel = (((a >> 4) & 0x8) |
-		 ((r >> 5) & 0x4) |
-		 ((g >> 6) & 0x2) |
-		 ((b >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_a1b1g1r1 (pixman_image_t *image,
-		  uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-	Splita(values[i]);
-	pixel = (((a >> 4) & 0x8) |
-		 ((b >> 5) & 0x4) |
-		 ((g >> 6) & 0x2) |
-		 ((r >> 7)      ));
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_c4 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	pixel = miIndexToEnt24(indexed, values[i]);
-	Store4(image, bits, i + x, pixel);
-    }
-}
-
-static FASTCALL void
-fbStore_a1 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
-	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
-
-	uint32_t v = values[i] & 0x80000000 ? mask : 0;
-	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
-    }
-}
-
-static FASTCALL void
-fbStore_g1 (pixman_image_t *image,
-	    uint32_t *bits, const uint32_t *values, int x, int width, const pixman_indexed_t * indexed)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
-	uint32_t  mask = FbStipMask((i+x) & 0x1f, 1);
-
-	uint32_t v = miIndexToEntY24(indexed,values[i]) ? mask : 0;
-	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
-    }
-}
-
-
-static storeProc storeProcForPicture (bits_image_t * pict)
-{
-    switch(pict->format) {
-    case PIXMAN_a8r8g8b8: return fbStore_a8r8g8b8;
-    case PIXMAN_x8r8g8b8: return fbStore_x8r8g8b8;
-    case PIXMAN_a8b8g8r8: return fbStore_a8b8g8r8;
-    case PIXMAN_x8b8g8r8: return fbStore_x8b8g8r8;
-
-        /* 24bpp formats */
-    case PIXMAN_r8g8b8: return fbStore_r8g8b8;
-    case PIXMAN_b8g8r8: return fbStore_b8g8r8;
-
-        /* 16bpp formats */
-    case PIXMAN_r5g6b5: return fbStore_r5g6b5;
-    case PIXMAN_b5g6r5: return fbStore_b5g6r5;
-
-    case PIXMAN_a1r5g5b5: return fbStore_a1r5g5b5;
-    case PIXMAN_x1r5g5b5: return fbStore_x1r5g5b5;
-    case PIXMAN_a1b5g5r5: return fbStore_a1b5g5r5;
-    case PIXMAN_x1b5g5r5: return fbStore_x1b5g5r5;
-    case PIXMAN_a4r4g4b4: return fbStore_a4r4g4b4;
-    case PIXMAN_x4r4g4b4: return fbStore_x4r4g4b4;
-    case PIXMAN_a4b4g4r4: return fbStore_a4b4g4r4;
-    case PIXMAN_x4b4g4r4: return fbStore_x4b4g4r4;
-
-        /* 8bpp formats */
-    case PIXMAN_a8: return  fbStore_a8;
-    case PIXMAN_r3g3b2: return fbStore_r3g3b2;
-    case PIXMAN_b2g3r3: return fbStore_b2g3r3;
-    case PIXMAN_a2r2g2b2: return fbStore_a2r2g2b2;
-    case PIXMAN_c8: return  fbStore_c8;
-    case PIXMAN_g8: return  fbStore_c8;
-    case PIXMAN_x4a4: return fbStore_x4a4;
-
-        /* 4bpp formats */
-    case PIXMAN_a4: return  fbStore_a4;
-    case PIXMAN_r1g2b1: return fbStore_r1g2b1;
-    case PIXMAN_b1g2r1: return fbStore_b1g2r1;
-    case PIXMAN_a1r1g1b1: return fbStore_a1r1g1b1;
-    case PIXMAN_a1b1g1r1: return fbStore_a1b1g1r1;
-    case PIXMAN_c4: return  fbStore_c4;
-    case PIXMAN_g4: return  fbStore_c4;
-
-        /* 1bpp formats */
-    case PIXMAN_a1: return  fbStore_a1;
-    case PIXMAN_g1: return  fbStore_g1;
-    default:
-        return NULL;
-    }
-}
-
-
 static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t color;
     uint32_t *end;
-    fetchPixelProc fetch = fetchPixelProcForPicture(pict);
+    fetchPixelProc fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     color = fetch(pict, 0, 0);
 
@@ -1771,19 +126,11 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
 
 static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
-    fetchProc fetch = fetchProcForPicture(pict);
+    fetchProc fetch = FETCH_PROC_FOR_PICTURE(pict);
 
     fetch(pict, x, y, width, buffer);
 }
 
-#ifdef PIXMAN_FB_ACCESSORS
-#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_accessors
-#define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions_accessors
-#else
-#define PIXMAN_COMPOSITE_RECT_GENERAL pixman_composite_rect_general_no_accessors
-#define PIXMAN_COMPOSE_FUNCTIONS pixman_composeFunctions
-#endif
-
 #ifdef PIXMAN_FB_ACCESSORS	/* The accessor version can't be parameterized from outside */
 static const
 #endif
@@ -2482,7 +829,7 @@ fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buff
     int x, y, i;
 
     /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -2528,7 +875,7 @@ fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer,
     int x, y, i;
 
     /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -2575,7 +922,7 @@ fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buf
     int x, y, i;
 
     /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
     {
@@ -2618,7 +965,7 @@ fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buf
     int i;
 
     /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -2695,7 +1042,7 @@ fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer
     int i;
 
     /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
@@ -2772,7 +1119,7 @@ fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *bu
     int i;
 
     /* initialize the two function pointers */
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     if(pixman_region_n_rects (pict->common.src_clip) == 1)
     {
@@ -2854,7 +1201,7 @@ fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer,
     int32_t cheight = pixman_fixed_to_int(params[1]);
     int xoff = (params[0] - pixman_fixed_1) >> 1;
     int yoff = (params[1] - pixman_fixed_1) >> 1;
-    fetch = fetchPixelProcForPicture(pict);
+    fetch = FETCH_PIXEL_PROC_FOR_PICTURE(pict);
 
     params += 2;
     for (i = 0; i < width; ++i) {
@@ -3075,7 +1422,7 @@ fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
 {
     uint32_t *bits;
     int32_t stride;
-    storeProc store = storeProcForPicture(pict);
+    storeProc store = STORE_PROC_FOR_PICTURE(pict);
     const pixman_indexed_t * indexed = pict->indexed;
 
     bits = pict->bits;
@@ -3100,8 +1447,8 @@ fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buf
 	return;
     }
 
-    store = storeProcForPicture(pict);
-    astore = storeProcForPicture(pict->common.alpha_map);
+    store = STORE_PROC_FOR_PICTURE(pict);
+    astore = STORE_PROC_FOR_PICTURE(pict->common.alpha_map);
     aindexed = pict->common.alpha_map->indexed;
 
     ax = x;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index a833b01..b0aa375 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -146,6 +146,12 @@ typedef struct point point_t;
 typedef FASTCALL void (*CombineMaskU) (uint32_t *src, const uint32_t *mask, int width);
 typedef FASTCALL void (*CombineFuncU) (uint32_t *dest, const uint32_t *src, int width);
 typedef FASTCALL void (*CombineFuncC) (uint32_t *dest, uint32_t *src, uint32_t *mask, int width);
+typedef FASTCALL void (*fetchProc)(bits_image_t *pict, int x, int y, int width,
+                                   uint32_t *buffer);
+typedef FASTCALL uint32_t (*fetchPixelProc)(bits_image_t *pict, int offset, int line);
+typedef FASTCALL void (*storeProc)(pixman_image_t *, uint32_t *bits,
+                                   const uint32_t *values, int x, int width,
+                                   const pixman_indexed_t *);
 
 typedef struct _FbComposeData {
     uint8_t	 op;
@@ -175,6 +181,13 @@ void pixman_composite_rect_general_accessors (const FbComposeData *data,
 void pixman_composite_rect_general (const FbComposeData *data,
 				    uint32_t *scanline_buffer);
 
+fetchProc pixman_fetchProcForPicture (bits_image_t *);
+fetchPixelProc pixman_fetchPixelProcForPicture (bits_image_t *);
+storeProc pixman_storeProcForPicture (bits_image_t *);
+fetchProc pixman_fetchProcForPicture_accessors (bits_image_t *);
+fetchPixelProc pixman_fetchPixelProcForPicture_accessors (bits_image_t *);
+storeProc pixman_storeProcForPicture_accessors (bits_image_t *);
+
 /* end */
 
 typedef enum
commit d6143b8634e2d923456b3198e692ccb463d59952
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Mar 21 17:13:32 2008 -0700

    Get rid of fbPrepareAccess and fbFinishAccess, since the former was unused and neither one did anything anyway.

diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 8c74d77..b75b461 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -1767,7 +1767,6 @@ static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t
     end = buffer + width;
     while (buffer < end)
 	*(buffer++) = color;
-    fbFinishAccess (pict->pDrawable);
 }
 
 static void fbFetch(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
@@ -2977,10 +2976,7 @@ fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffe
     if (pict->common.transform)
     {
         if (!pixman_transform_point_3d (pict->common.transform, &v))
-        {
-            fbFinishAccess (pict->pDrawable);
             return;
-        }
         unit.vector[0] = pict->common.transform->matrix[0][0];
         unit.vector[1] = pict->common.transform->matrix[1][0];
         unit.vector[2] = pict->common.transform->matrix[2][0];
@@ -3038,8 +3034,6 @@ fbFetchTransformed(bits_image_t * pict, int x, int y, int width, uint32_t *buffe
 	
         fbFetchTransformed_Convolution(pict, width, buffer, mask, maskBits, affine, v, unit);
     }
-
-    fbFinishAccess (pict->pDrawable);
 }
 
 
@@ -3088,7 +3082,6 @@ fbStore(bits_image_t * pict, int x, int y, int width, uint32_t *buffer)
     stride = pict->rowstride;
     bits += y*stride;
     store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
-    fbFinishAccess (pict->pDrawable);
 }
 
 static void
@@ -3127,9 +3120,6 @@ fbStoreExternalAlpha(bits_image_t * pict, int x, int y, int width, uint32_t *buf
     store((pixman_image_t *)pict, bits, buffer, x, width, indexed);
     astore((pixman_image_t *)pict->common.alpha_map,
 	   alpha_bits, buffer, ax - pict->common.alpha_origin.x, width, aindexed);
-
-    fbFinishAccess (pict->alpha_map->pDrawable);
-    fbFinishAccess (pict->pDrawable);
 }
 
 typedef void (*scanStoreProc)(pixman_image_t *, int, int, int, uint32_t *);
@@ -3428,9 +3418,6 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 	    }
 	}
     }
-
-    if (!store)
-	fbFinishAccess (data->dest->pDrawable);
 }
 
 #ifndef PIXMAN_FB_ACCESSORS
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index e4430d1..366c444 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -149,9 +149,6 @@ fbCompositeOver_x888x8x8888 (pixman_op_t      op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 static void
@@ -340,9 +337,6 @@ fbCompositeSolidMask_nx8x8888 (pixman_op_t      op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -416,9 +410,6 @@ fbCompositeSolidMask_nx8888x8888C (pixman_op_t op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -481,9 +472,6 @@ fbCompositeSolidMask_nx8x0888 (pixman_op_t op,
 	    dst += 3;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -547,9 +535,6 @@ fbCompositeSolidMask_nx8x0565 (pixman_op_t op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -623,9 +608,6 @@ fbCompositeSolidMask_nx8888x0565C (pixman_op_t op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pMask->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -672,9 +654,6 @@ fbCompositeSrc_8888x8888 (pixman_op_t op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pSrc->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -724,9 +703,6 @@ fbCompositeSrc_8888x0888 (pixman_op_t op,
 	    dst += 3;
 	}
     }
-
-    fbFinishAccess (pSrc->pDrawable);
-    fbFinishAccess (pDst->pDrawable);
 }
 
 void
@@ -779,9 +755,6 @@ fbCompositeSrc_8888x0565 (pixman_op_t op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pSrc->pDrawable);
 }
 
 void
@@ -832,9 +805,6 @@ fbCompositeSrcAdd_8000x8000 (pixman_op_t	op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pSrc->pDrawable);
 }
 
 void
@@ -892,9 +862,6 @@ fbCompositeSrcAdd_8888x8888 (pixman_op_t	op,
 	    dst++;
 	}
     }
-
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pSrc->pDrawable);
 }
 
 static void
@@ -947,9 +914,6 @@ fbCompositeSrcAdd_8888x8x8 (pixman_op_t op,
 	    WRITE(pDst, dst++, r);
 	}
     }
-
-    fbFinishAccess(pDst->pDrawable);
-    fbFinishAccess(pMask->pDrawable);
 }
 
 void
@@ -997,8 +961,6 @@ fbCompositeSrcAdd_1000x1000 (pixman_op_t	op,
 	   FALSE,
 	   FALSE);
 
-    fbFinishAccess(pDst->pDrawable);
-    fbFinishAccess(pSrc->pDrawable);
 #endif
 }
 
@@ -1059,8 +1021,6 @@ fbCompositeSolidMask_nx1xn (pixman_op_t op,
 	      FB_ALLONES,
 	      0x0);
 
-    fbFinishAccess (pDst->pDrawable);
-    fbFinishAccess (pMask->pDrawable);
 #endif
 }
 
@@ -1131,9 +1091,6 @@ fbCompositeSrcSrc_nxn  (pixman_op_t	   op,
 
 	   reverse,
 	   upsidedown);
-
-    fbFinishAccess(pSrc->pDrawable);
-    fbFinishAccess(pDst->pDrawable);
 #endif
 }
 
@@ -1210,9 +1167,6 @@ fbCompositeSrc_8888xx888 (pixman_op_t op,
 	dst += dstStride;
 	src += srcStride;
     }
-
-    fbFinishAccess(pSrc->pDrawable);
-    fbFinishAccess(pDst->pDrawable);
 }
 
 static void
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 90ea4cb..a833b01 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -676,10 +676,6 @@ FASTCALL void pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int wi
 	}								\
     } while (0)
 
-/* FIXME */
-#define fbPrepareAccess(x)
-#define fbFinishAccess(x)
-
 #else
 
 #define READ(img, ptr)		(*(ptr))
@@ -688,8 +684,7 @@ FASTCALL void pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int wi
     memcpy(dst, src, size)
 #define MEMSET_WRAPPED(img, dst, val, size)					\
     memset(dst, val, size)
-#define fbPrepareAccess(x)
-#define fbFinishAccess(x)
+
 #endif
 
 #define fbComposeGetSolid(img, res, fmt)				\
diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index 468324e..0dca6cc 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -83,8 +83,6 @@ pixman_add_traps (pixman_image_t *	image,
 	}
 	traps++;
     }
-
-    fbFinishAccess (pPicture->pDrawable);
 }
 
 static void
commit 9fe5913c34a71d83a497e0d2de8db50522db75ee
Author: Aaron Plattner <aplattner at nvidia.com>
Date:   Fri Mar 21 14:15:34 2008 -0700

    Rename pixman-compose-operators.c to pixman-combine.c and fix the build.
    
    Add pixman-combine.c to the source list.  Make the functions in it static.  Call
    fbCombineInU through the PIXMAN_COMPOSE_FUNCTIONS table.  Declare the functions
    tables as extern in pixman-private.h.

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 1f21f8c..40ff20f 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -6,6 +6,7 @@ libpixman_1_la_SOURCES =		\
 	pixman-region.c		\
 	pixman-private.h	\
 	pixman-image.c		\
+	pixman-combine.c	\
 	pixman-compose.c	\
 	pixman-compose-accessors.c	\
 	pixman-pict.c		\
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index 93ab3f6..eb3795d 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -24,7 +24,7 @@ endif
 SOURCES = \
 	pixman-region.c				\
 	pixman-image.c					\
-	pixman-compose-operators.c				\
+	pixman-combine.c				\
 	pixman-compose.c				\
 	pixman-compose-accessors.c	\
 	pixman-pict.c					\
diff --git a/pixman/pixman-combine.c b/pixman/pixman-combine.c
new file mode 100644
index 0000000..0d9c066
--- /dev/null
+++ b/pixman/pixman-combine.c
@@ -0,0 +1,1260 @@
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <string.h>
+
+#include "pixman-private.h"
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'U' at the end of the name,
+ * the component version has a 'C'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+
+
+/*
+ * Combine src and mask
+ */
+FASTCALL void
+pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t a = *(mask + i) >> 24;
+        uint32_t s = *(src + i);
+        FbByteMul(s, a);
+        *(src + i) = s;
+    }
+}
+
+/*
+ * All of the composing functions
+ */
+
+FASTCALL static void
+fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL static void
+fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
+{
+    memcpy(dest, src, width*sizeof(uint32_t));
+}
+
+/* if the Src is opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~s);
+
+        FbByteMulAdd(d, ia, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, this is a noop */
+FASTCALL static void
+fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~*(dest + i));
+        FbByteMulAdd(s, ia, d);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, this is a noop */
+FASTCALL static void
+fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(~*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(~*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineInU */
+/* if the Dst is opaque, call fbCombineOverU */
+/* if both the Src and Dst are opaque, call fbCombineSrcU */
+FASTCALL static void
+fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t dest_a = Alpha(d);
+        uint32_t src_ia = Alpha(~s);
+
+        FbByteAddMul(s, dest_a, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverReverseU */
+/* if the Dst is opaque, call fbCombineInReverseU */
+/* if both the Src and Dst are opaque, call fbCombineDstU */
+FASTCALL static void
+fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_a = Alpha(s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverU */
+/* if the Dst is opaque, call fbCombineOverReverseU */
+/* if both the Src and Dst are opaque, call fbCombineClear */
+FASTCALL static void
+fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_ia = Alpha(~s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineAddU */
+/* if the Dst is opaque, call fbCombineAddU */
+/* if both the Src and Dst are opaque, call fbCombineAddU */
+FASTCALL static void
+fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint16_t  sa, da;
+
+        sa = s >> 24;
+        da = ~d >> 24;
+        if (sa > da)
+        {
+            sa = FbIntDiv(da, sa);
+            FbByteMul(s, sa);
+        };
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+
+/*
+ * All of the disjoint composing functions
+
+ The four entries in the first column indicate what source contributions
+ come from each of the four areas of the picture -- areas covered by neither
+ A nor B, areas covered only by A, areas covered only by B and finally
+ areas covered by both A and B.
+
+ Disjoint			Conjoint
+ Fa		Fb		Fa		Fb
+ (0,0,0,0)	0		0		0		0
+ (0,A,0,A)	1		0		1		0
+ (0,0,B,B)	0		1		0		1
+ (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
+ (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
+ (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
+ (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
+ (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
+ (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
+ (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
+ (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
+ (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
+
+*/
+
+#define CombineAOut 1
+#define CombineAIn  2
+#define CombineBOut 4
+#define CombineBIn  8
+
+#define CombineClear	0
+#define CombineA	(CombineAOut|CombineAIn)
+#define CombineB	(CombineBOut|CombineBIn)
+#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
+#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
+#define CombineAAtop	(CombineBOut|CombineAIn)
+#define CombineBAtop	(CombineAOut|CombineBIn)
+#define CombineXor	(CombineAOut|CombineBOut)
+
+/* portion covered by a but not b */
+FASTCALL static uint8_t
+fbCombineDisjointOutPart (uint8_t a, uint8_t b)
+{
+    /* min (1, (1-b) / a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* (1-b) / a */
+}
+
+/* portion covered by both a and b */
+FASTCALL static uint8_t
+fbCombineDisjointInPart (uint8_t a, uint8_t b)
+{
+    /* max (1-(1-b)/a,0) */
+    /*  = - min ((1-b)/a - 1, 0) */
+    /*  = 1 - min (1, (1-b)/a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0;	    /* 1 - 1 */
+    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
+}
+
+/* portion covered by a but not b */
+FASTCALL static uint8_t
+fbCombineConjointOutPart (uint8_t a, uint8_t b)
+{
+    /* max (1-b/a,0) */
+    /* = 1-min(b/a,1) */
+
+    /* min (1, (1-b) / a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0x00;	    /* 0 */
+    return ~FbIntDiv(b,a);   /* 1 - b/a */
+}
+
+/* portion covered by both a and b */
+FASTCALL static uint8_t
+fbCombineConjointInPart (uint8_t a, uint8_t b)
+{
+    /* min (1,b/a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* b/a */
+}
+
+FASTCALL static void
+fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t m,n,o,p;
+        uint16_t Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineDisjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineDisjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineDisjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineDisjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint16_t  a = s >> 24;
+
+        if (a != 0x00)
+        {
+            if (a != 0xff)
+            {
+                uint32_t d = *(dest + i);
+                a = fbCombineDisjointOutPart (d >> 24, a);
+                FbByteMulAdd(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
+}
+
+FASTCALL static void
+fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t  m,n,o,p;
+        uint16_t  Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineConjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineConjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineConjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineConjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
+}
+
+
+FASTCALL static void
+fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
+}
+
+
+FASTCALL static void
+fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
+}
+
+
+FASTCALL static void
+fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineXor);
+}
+
+/********************************************************************************/
+/*************************** Per Channel functions ******************************/
+/********************************************************************************/
+
+FASTCALL static void
+fbCombineMaskC (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+
+    uint32_t	x;
+    uint16_t	xa;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    x = *(src);
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    xa = x >> 24;
+    FbByteMulC(x, a);
+    *(src) = x;
+    FbByteMul(a, xa);
+    *(mask) = a;
+}
+
+FASTCALL static void
+fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t	x;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    if (a == 0xffffffff)
+	return;
+
+    x = *(src);
+    FbByteMulC(x, a);
+    *(src) =x;
+}
+
+FASTCALL static void
+fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t	x;
+
+    if (!a)
+	return;
+
+    x = *(src) >> 24;
+    if (x == 0xff)
+	return;
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    FbByteMul(a, x);
+    *(mask) = a;
+}
+
+
+
+FASTCALL static void
+fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL static void
+fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+	*(dest) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskC (&s, &m);
+
+	a = ~m;
+        if (a != 0xffffffff)
+        {
+            if (a)
+            {
+                uint32_t d = *(dest + i);
+                FbByteMulAddC(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = ~d >> 24;
+
+        if (a)
+        {
+            uint32_t s = *(src + i);
+	    uint32_t m = *(mask + i);
+
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMulAdd(s, a, d);
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+	a = m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = ~d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+        a = ~m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL static void
+fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t d = *(dest + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL static void
+fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint16_t  sa, sr, sg, sb, da;
+        uint16_t  t, u, v;
+        uint32_t  m,n,o,p;
+
+        d = *(dest + i);
+        s = *(src + i);
+	m = *(mask + i);
+
+	fbCombineMaskC (&s, &m);
+
+        sa = (m >> 24);
+        sr = (m >> 16) & 0xff;
+        sg = (m >>  8) & 0xff;
+        sb = (m      ) & 0xff;
+        da = ~d >> 24;
+
+        if (sb <= da)
+            m = FbAdd(s,d,0,t);
+        else
+            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
+
+        if (sg <= da)
+            n = FbAdd(s,d,8,t);
+        else
+            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
+
+        if (sr <= da)
+            o = FbAdd(s,d,16,t);
+        else
+            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
+
+        if (sa <= da)
+            p = FbAdd(s,d,24,t);
+        else
+            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
+
+	*(dest + i) = m|n|o|p;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL static void
+fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+FASTCALL static void
+fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL static void
+fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL static void
+fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
+}
+
+FASTCALL static void
+fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL static void
+fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL static void
+fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL static void
+fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL static void
+fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL static void
+fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL static void
+fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+CombineFuncU pixman_fbCombineFuncU[] = {
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineOverU,
+    fbCombineOverReverseU,
+    fbCombineInU,
+    fbCombineInReverseU,
+    fbCombineOutU,
+    fbCombineOutReverseU,
+    fbCombineAtopU,
+    fbCombineAtopReverseU,
+    fbCombineXorU,
+    fbCombineAddU,
+    fbCombineSaturateU,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineDisjointOverU,
+    fbCombineSaturateU, /* DisjointOverReverse */
+    fbCombineDisjointInU,
+    fbCombineDisjointInReverseU,
+    fbCombineDisjointOutU,
+    fbCombineDisjointOutReverseU,
+    fbCombineDisjointAtopU,
+    fbCombineDisjointAtopReverseU,
+    fbCombineDisjointXorU,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineConjointOverU,
+    fbCombineConjointOverReverseU,
+    fbCombineConjointInU,
+    fbCombineConjointInReverseU,
+    fbCombineConjointOutU,
+    fbCombineConjointOutReverseU,
+    fbCombineConjointAtopU,
+    fbCombineConjointAtopReverseU,
+    fbCombineConjointXorU,
+};
+
+CombineFuncC pixman_fbCombineFuncC[] = {
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineOverC,
+    fbCombineOverReverseC,
+    fbCombineInC,
+    fbCombineInReverseC,
+    fbCombineOutC,
+    fbCombineOutReverseC,
+    fbCombineAtopC,
+    fbCombineAtopReverseC,
+    fbCombineXorC,
+    fbCombineAddC,
+    fbCombineSaturateC,
+    NULL,
+    NULL,
+    fbCombineClearC,	    /* 0x10 */
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineDisjointOverC,
+    fbCombineSaturateC, /* DisjointOverReverse */
+    fbCombineDisjointInC,
+    fbCombineDisjointInReverseC,
+    fbCombineDisjointOutC,
+    fbCombineDisjointOutReverseC,
+    fbCombineDisjointAtopC,
+    fbCombineDisjointAtopReverseC,
+    fbCombineDisjointXorC,  /* 0x1b */
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineConjointOverC,
+    fbCombineConjointOverReverseC,
+    fbCombineConjointInC,
+    fbCombineConjointInReverseC,
+    fbCombineConjointOutC,
+    fbCombineConjointOutReverseC,
+    fbCombineConjointAtopC,
+    fbCombineConjointAtopReverseC,
+    fbCombineConjointXorC,
+};
diff --git a/pixman/pixman-compose-operators.c b/pixman/pixman-compose-operators.c
deleted file mode 100644
index 68594fa..0000000
--- a/pixman/pixman-compose-operators.c
+++ /dev/null
@@ -1,1255 +0,0 @@
-
-#include "pixman-private.h"
-/*
- * There are two ways of handling alpha -- either as a single unified value or
- * a separate value for each component, hence each macro must have two
- * versions.  The unified alpha version has a 'U' at the end of the name,
- * the component version has a 'C'.  Similarly, functions which deal with
- * this difference will have two versions using the same convention.
- */
-
-
-/*
- * Combine src and mask
- */
-FASTCALL void
-pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t a = *(mask + i) >> 24;
-        uint32_t s = *(src + i);
-        FbByteMul(s, a);
-        *(src + i) = s;
-    }
-}
-
-/*
- * All of the composing functions
- */
-
-FASTCALL void
-fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
-{
-    memset(dest, 0, width*sizeof(uint32_t));
-}
-
-FASTCALL void
-fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
-{
-    memcpy(dest, src, width*sizeof(uint32_t));
-}
-
-/* if the Src is opaque, call fbCombineSrcU */
-FASTCALL void
-fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~s);
-
-        FbByteMulAdd(d, ia, s);
-	*(dest + i) = d;
-    }
-}
-
-/* if the Dst is opaque, this is a noop */
-FASTCALL void
-fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~*(dest + i));
-        FbByteMulAdd(s, ia, d);
-	*(dest + i) = s;
-    }
-}
-
-/* if the Dst is opaque, call fbCombineSrcU */
-FASTCALL void
-fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(*(dest + i));
-        FbByteMul(s, a);
-	*(dest + i) = s;
-    }
-}
-
-/* if the Src is opaque, this is a noop */
-FASTCALL void
-fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(*(src + i));
-        FbByteMul(d, a);
-	*(dest + i) = d;
-    }
-}
-
-/* if the Dst is opaque, call fbCombineClear */
-FASTCALL void
-fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(~*(dest + i));
-        FbByteMul(s, a);
-	*(dest + i) = s;
-    }
-}
-
-/* if the Src is opaque, call fbCombineClear */
-FASTCALL void
-fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(~*(src + i));
-        FbByteMul(d, a);
-	*(dest + i) = d;
-    }
-}
-
-/* if the Src is opaque, call fbCombineInU */
-/* if the Dst is opaque, call fbCombineOverU */
-/* if both the Src and Dst are opaque, call fbCombineSrcU */
-FASTCALL void
-fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t dest_a = Alpha(d);
-        uint32_t src_ia = Alpha(~s);
-
-        FbByteAddMul(s, dest_a, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-/* if the Src is opaque, call fbCombineOverReverseU */
-/* if the Dst is opaque, call fbCombineInReverseU */
-/* if both the Src and Dst are opaque, call fbCombineDstU */
-FASTCALL void
-fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_a = Alpha(s);
-        uint32_t dest_ia = Alpha(~d);
-
-        FbByteAddMul(s, dest_ia, d, src_a);
-	*(dest + i) = s;
-    }
-}
-
-/* if the Src is opaque, call fbCombineOverU */
-/* if the Dst is opaque, call fbCombineOverReverseU */
-/* if both the Src and Dst are opaque, call fbCombineClear */
-FASTCALL void
-fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_ia = Alpha(~s);
-        uint32_t dest_ia = Alpha(~d);
-
-        FbByteAddMul(s, dest_ia, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-/* if the Src is opaque, call fbCombineAddU */
-/* if the Dst is opaque, call fbCombineAddU */
-/* if both the Src and Dst are opaque, call fbCombineAddU */
-FASTCALL void
-fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint16_t  sa, da;
-
-        sa = s >> 24;
-        da = ~d >> 24;
-        if (sa > da)
-        {
-            sa = FbIntDiv(da, sa);
-            FbByteMul(s, sa);
-        };
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-
-/*
- * All of the disjoint composing functions
-
- The four entries in the first column indicate what source contributions
- come from each of the four areas of the picture -- areas covered by neither
- A nor B, areas covered only by A, areas covered only by B and finally
- areas covered by both A and B.
-
- Disjoint			Conjoint
- Fa		Fb		Fa		Fb
- (0,0,0,0)	0		0		0		0
- (0,A,0,A)	1		0		1		0
- (0,0,B,B)	0		1		0		1
- (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
- (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
- (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
- (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
- (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
- (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
- (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
- (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
- (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
-
-*/
-
-#define CombineAOut 1
-#define CombineAIn  2
-#define CombineBOut 4
-#define CombineBIn  8
-
-#define CombineClear	0
-#define CombineA	(CombineAOut|CombineAIn)
-#define CombineB	(CombineBOut|CombineBIn)
-#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
-#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
-#define CombineAAtop	(CombineBOut|CombineAIn)
-#define CombineBAtop	(CombineAOut|CombineBIn)
-#define CombineXor	(CombineAOut|CombineBOut)
-
-/* portion covered by a but not b */
-FASTCALL uint8_t
-fbCombineDisjointOutPart (uint8_t a, uint8_t b)
-{
-    /* min (1, (1-b) / a) */
-
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0xff;	    /* 1 */
-    return FbIntDiv(b,a);   /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-FASTCALL uint8_t
-fbCombineDisjointInPart (uint8_t a, uint8_t b)
-{
-    /* max (1-(1-b)/a,0) */
-    /*  = - min ((1-b)/a - 1, 0) */
-    /*  = 1 - min (1, (1-b)/a) */
-
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0;	    /* 1 - 1 */
-    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
-}
-
-/* portion covered by a but not b */
-FASTCALL uint8_t
-fbCombineConjointOutPart (uint8_t a, uint8_t b)
-{
-    /* max (1-b/a,0) */
-    /* = 1-min(b/a,1) */
-
-    /* min (1, (1-b) / a) */
-
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0x00;	    /* 0 */
-    return ~FbIntDiv(b,a);   /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-FASTCALL uint8_t
-fbCombineConjointInPart (uint8_t a, uint8_t b)
-{
-    /* min (1,b/a) */
-
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0xff;	    /* 1 */
-    return FbIntDiv(b,a);   /* b/a */
-}
-
-FASTCALL void
-fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t m,n,o,p;
-        uint16_t Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            Fa = fbCombineDisjointOutPart (sa, da);
-            break;
-        case CombineAIn:
-            Fa = fbCombineDisjointInPart (sa, da);
-            break;
-        case CombineA:
-            Fa = 0xff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            Fb = fbCombineDisjointOutPart (da, sa);
-            break;
-        case CombineBIn:
-            Fb = fbCombineDisjointInPart (da, sa);
-            break;
-        case CombineB:
-            Fb = 0xff;
-            break;
-        }
-        m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint16_t  a = s >> 24;
-
-        if (a != 0x00)
-        {
-            if (a != 0xff)
-            {
-                uint32_t d = *(dest + i);
-                a = fbCombineDisjointOutPart (d >> 24, a);
-                FbByteMulAdd(d, a, s);
-                s = d;
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-FASTCALL void
-fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
-}
-
-FASTCALL void
-fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
-}
-
-FASTCALL void
-fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
-}
-
-FASTCALL void
-fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
-}
-
-FASTCALL void
-fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
-}
-
-FASTCALL void
-fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
-}
-
-FASTCALL void
-fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
-}
-
-FASTCALL void
-fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t  m,n,o,p;
-        uint16_t  Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            Fa = fbCombineConjointOutPart (sa, da);
-            break;
-        case CombineAIn:
-            Fa = fbCombineConjointInPart (sa, da);
-            break;
-        case CombineA:
-            Fa = 0xff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            Fb = fbCombineConjointOutPart (da, sa);
-            break;
-        case CombineBIn:
-            Fb = fbCombineConjointInPart (da, sa);
-            break;
-        case CombineB:
-            Fb = 0xff;
-            break;
-        }
-        m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
-}
-
-
-FASTCALL void
-fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
-}
-
-
-FASTCALL void
-fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
-}
-
-
-FASTCALL void
-fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
-}
-
-FASTCALL void
-fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
-}
-
-FASTCALL void
-fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
-}
-
-FASTCALL void
-fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
-}
-
-FASTCALL void
-fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
-}
-
-FASTCALL void
-fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineXor);
-}
-
-/********************************************************************************/
-/*************************** Per Channel functions ******************************/
-/********************************************************************************/
-
-FASTCALL void
-fbCombineMaskC (uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *mask;
-
-    uint32_t	x;
-    uint16_t	xa;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    x = *(src);
-    if (a == 0xffffffff)
-    {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
-	*(mask) = x;
-	return;
-    }
-
-    xa = x >> 24;
-    FbByteMulC(x, a);
-    *(src) = x;
-    FbByteMul(a, xa);
-    *(mask) = a;
-}
-
-FASTCALL void
-fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
-{
-    uint32_t a = *mask;
-    uint32_t	x;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    if (a == 0xffffffff)
-	return;
-
-    x = *(src);
-    FbByteMulC(x, a);
-    *(src) =x;
-}
-
-FASTCALL void
-fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *(mask);
-    uint32_t	x;
-
-    if (!a)
-	return;
-
-    x = *(src) >> 24;
-    if (x == 0xff)
-	return;
-    if (a == 0xffffffff)
-    {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
-	*(mask) = x;
-	return;
-    }
-
-    FbByteMul(a, x);
-    *(mask) = a;
-}
-
-
-
-FASTCALL void
-fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    memset(dest, 0, width*sizeof(uint32_t));
-}
-
-FASTCALL void
-fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-
-	fbCombineMaskValueC (&s, &m);
-
-	*(dest) = s;
-    }
-}
-
-FASTCALL void
-fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	fbCombineMaskC (&s, &m);
-
-	a = ~m;
-        if (a != 0xffffffff)
-        {
-            if (a)
-            {
-                uint32_t d = *(dest + i);
-                FbByteMulAddC(d, a, s);
-                s = d;
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-FASTCALL void
-fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = ~d >> 24;
-
-        if (a)
-        {
-            uint32_t s = *(src + i);
-	    uint32_t m = *(mask + i);
-
-	    fbCombineMaskValueC (&s, &m);
-
-            if (a != 0xff)
-            {
-                FbByteMulAdd(s, a, d);
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-FASTCALL void
-fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = d >> 24;
-        uint32_t s = 0;
-        if (a)
-        {
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    fbCombineMaskValueC (&s, &m);
-            if (a != 0xff)
-            {
-                FbByteMul(s, a);
-            }
-        }
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t a;
-
-	fbCombineMaskAlphaC (&s, &m);
-
-	a = m;
-        if (a != 0xffffffff)
-        {
-            uint32_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                FbByteMulC(d, a);
-            }
-	    *(dest + i) = d;
-        }
-    }
-}
-
-FASTCALL void
-fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = ~d >> 24;
-        uint32_t s = 0;
-        if (a)
-        {
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    fbCombineMaskValueC (&s, &m);
-
-            if (a != 0xff)
-            {
-                FbByteMul(s, a);
-            }
-        }
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	fbCombineMaskAlphaC (&s, &m);
-
-        a = ~m;
-        if (a != 0xffffffff)
-        {
-            uint32_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                FbByteMulC(d, a);
-            }
-	    *(dest + i) = d;
-        }
-    }
-}
-
-FASTCALL void
-fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-        ad = ~m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-FASTCALL void
-fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	ad = m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-FASTCALL void
-fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	ad = ~m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-FASTCALL void
-fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t d = *(dest + i);
-
-	fbCombineMaskValueC (&s, &m);
-
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-FASTCALL void
-fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint16_t  sa, sr, sg, sb, da;
-        uint16_t  t, u, v;
-        uint32_t  m,n,o,p;
-
-        d = *(dest + i);
-        s = *(src + i);
-	m = *(mask + i);
-
-	fbCombineMaskC (&s, &m);
-
-        sa = (m >> 24);
-        sr = (m >> 16) & 0xff;
-        sg = (m >>  8) & 0xff;
-        sb = (m      ) & 0xff;
-        da = ~d >> 24;
-
-        if (sb <= da)
-            m = FbAdd(s,d,0,t);
-        else
-            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
-
-        if (sg <= da)
-            n = FbAdd(s,d,8,t);
-        else
-            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
-
-        if (sr <= da)
-            o = FbAdd(s,d,16,t);
-        else
-            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
-
-        if (sa <= da)
-            p = FbAdd(s,d,24,t);
-        else
-            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
-
-	*(dest + i) = m|n|o|p;
-    }
-}
-
-FASTCALL void
-fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	sa = m;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineAIn:
-            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineA:
-            Fa = 0xffffffff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineBIn:
-            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineB:
-            Fb = 0xffffffff;
-            break;
-        }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
-}
-
-FASTCALL void
-fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
-}
-
-FASTCALL void
-fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
-}
-
-FASTCALL void
-fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
-}
-
-FASTCALL void
-fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
-}
-
-FASTCALL void
-fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
-}
-
-FASTCALL void
-fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
-}
-
-FASTCALL void
-fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
-}
-
-FASTCALL void
-fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-        sa = m;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineAIn:
-            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineA:
-            Fa = 0xffffffff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineBIn:
-            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineB:
-            Fb = 0xffffffff;
-            break;
-        }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-FASTCALL void
-fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
-}
-
-FASTCALL void
-fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
-}
-
-FASTCALL void
-fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
-}
-
-FASTCALL void
-fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
-}
-
-FASTCALL void
-fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
-}
-
-FASTCALL void
-fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
-}
-
-FASTCALL void
-fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
-}
-
-FASTCALL void
-fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
-}
-
-FASTCALL void
-fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
-}
-
-CombineFuncU pixman_fbCombineFuncU[] = {
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineOverU,
-    fbCombineOverReverseU,
-    fbCombineInU,
-    fbCombineInReverseU,
-    fbCombineOutU,
-    fbCombineOutReverseU,
-    fbCombineAtopU,
-    fbCombineAtopReverseU,
-    fbCombineXorU,
-    fbCombineAddU,
-    fbCombineSaturateU,
-    NULL,
-    NULL,
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineDisjointOverU,
-    fbCombineSaturateU, /* DisjointOverReverse */
-    fbCombineDisjointInU,
-    fbCombineDisjointInReverseU,
-    fbCombineDisjointOutU,
-    fbCombineDisjointOutReverseU,
-    fbCombineDisjointAtopU,
-    fbCombineDisjointAtopReverseU,
-    fbCombineDisjointXorU,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineConjointOverU,
-    fbCombineConjointOverReverseU,
-    fbCombineConjointInU,
-    fbCombineConjointInReverseU,
-    fbCombineConjointOutU,
-    fbCombineConjointOutReverseU,
-    fbCombineConjointAtopU,
-    fbCombineConjointAtopReverseU,
-    fbCombineConjointXorU,
-};
-
-CombineFuncC pixman_fbCombineFuncC[] = {
-    fbCombineClearC,
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineOverC,
-    fbCombineOverReverseC,
-    fbCombineInC,
-    fbCombineInReverseC,
-    fbCombineOutC,
-    fbCombineOutReverseC,
-    fbCombineAtopC,
-    fbCombineAtopReverseC,
-    fbCombineXorC,
-    fbCombineAddC,
-    fbCombineSaturateC,
-    NULL,
-    NULL,
-    fbCombineClearC,	    /* 0x10 */
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineDisjointOverC,
-    fbCombineSaturateC, /* DisjointOverReverse */
-    fbCombineDisjointInC,
-    fbCombineDisjointInReverseC,
-    fbCombineDisjointOutC,
-    fbCombineDisjointOutReverseC,
-    fbCombineDisjointAtopC,
-    fbCombineDisjointAtopReverseC,
-    fbCombineDisjointXorC,  /* 0x1b */
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    fbCombineClearC,
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineConjointOverC,
-    fbCombineConjointOverReverseC,
-    fbCombineConjointInC,
-    fbCombineConjointInReverseC,
-    fbCombineConjointOutC,
-    fbCombineConjointOutReverseC,
-    fbCombineConjointAtopC,
-    fbCombineConjointAtopReverseC,
-    fbCombineConjointXorC,
-};
\ No newline at end of file
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index 4ec9f69..8c74d77 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -3373,7 +3373,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 
 		    if (mask_buffer)
 		    {
-			fbCombineInU (mask_buffer, src_buffer, data->width);
+			PIXMAN_COMPOSE_FUNCTIONS.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 			src_mask_buffer = mask_buffer;
 		    }
 		    else
@@ -3400,7 +3400,7 @@ PIXMAN_COMPOSITE_RECT_GENERAL (const FbComposeData *data,
 		fetchMask (data->mask, data->xMask, data->yMask + i,
 			   data->width, mask_buffer, 0, 0);
 
-		fbCombineInU (mask_buffer, src_buffer, data->width);
+		PIXMAN_COMPOSE_FUNCTIONS.combineU[PIXMAN_OP_IN] (mask_buffer, src_buffer, data->width);
 
 		src_mask_buffer = mask_buffer;
 	    }
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index bfc0dff..90ea4cb 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -300,8 +300,8 @@ union pixman_image
 };
 
 
-CombineFuncU pixman_fbCombineFuncU[];
-CombineFuncC pixman_fbCombineFuncC[];
+extern CombineFuncU pixman_fbCombineFuncU[];
+extern CombineFuncC pixman_fbCombineFuncC[];
 FASTCALL void pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width);
 
 #define LOG2_BITMAP_PAD 5
commit d47a686d91d28d996fc7326ada0d2f0c3a305852
Author: Antoine Azar <cairo at antoineazar.com>
Date:   Thu Feb 21 03:38:46 2008 -0500

    [PATCH] Moved all composition operators from pixman-compose.c to their own c/h file
    
    Signed-off-by: Aaron Plattner <aplattner at nvidia.com>

diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index 68aba4b..93ab3f6 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -24,6 +24,7 @@ endif
 SOURCES = \
 	pixman-region.c				\
 	pixman-image.c					\
+	pixman-compose-operators.c				\
 	pixman-compose.c				\
 	pixman-compose-accessors.c	\
 	pixman-pict.c					\
diff --git a/pixman/pixman-compose-operators.c b/pixman/pixman-compose-operators.c
new file mode 100644
index 0000000..68594fa
--- /dev/null
+++ b/pixman/pixman-compose-operators.c
@@ -0,0 +1,1255 @@
+
+#include "pixman-private.h"
+/*
+ * There are two ways of handling alpha -- either as a single unified value or
+ * a separate value for each component, hence each macro must have two
+ * versions.  The unified alpha version has a 'U' at the end of the name,
+ * the component version has a 'C'.  Similarly, functions which deal with
+ * this difference will have two versions using the same convention.
+ */
+
+
+/*
+ * Combine src and mask
+ */
+FASTCALL void
+pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t a = *(mask + i) >> 24;
+        uint32_t s = *(src + i);
+        FbByteMul(s, a);
+        *(src + i) = s;
+    }
+}
+
+/*
+ * All of the composing functions
+ */
+
+FASTCALL void
+fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL void
+fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
+{
+    memcpy(dest, src, width*sizeof(uint32_t));
+}
+
+/* if the Src is opaque, call fbCombineSrcU */
+FASTCALL void
+fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~s);
+
+        FbByteMulAdd(d, ia, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, this is a noop */
+FASTCALL void
+fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t ia = Alpha(~*(dest + i));
+        FbByteMulAdd(s, ia, d);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineSrcU */
+FASTCALL void
+fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, this is a noop */
+FASTCALL void
+fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Dst is opaque, call fbCombineClear */
+FASTCALL void
+fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t a = Alpha(~*(dest + i));
+        FbByteMul(s, a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineClear */
+FASTCALL void
+fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = Alpha(~*(src + i));
+        FbByteMul(d, a);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineInU */
+/* if the Dst is opaque, call fbCombineOverU */
+/* if both the Src and Dst are opaque, call fbCombineSrcU */
+FASTCALL void
+fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t dest_a = Alpha(d);
+        uint32_t src_ia = Alpha(~s);
+
+        FbByteAddMul(s, dest_a, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverReverseU */
+/* if the Dst is opaque, call fbCombineInReverseU */
+/* if both the Src and Dst are opaque, call fbCombineDstU */
+FASTCALL void
+fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_a = Alpha(s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_a);
+	*(dest + i) = s;
+    }
+}
+
+/* if the Src is opaque, call fbCombineOverU */
+/* if the Dst is opaque, call fbCombineOverReverseU */
+/* if both the Src and Dst are opaque, call fbCombineClear */
+FASTCALL void
+fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t src_ia = Alpha(~s);
+        uint32_t dest_ia = Alpha(~d);
+
+        FbByteAddMul(s, dest_ia, d, src_ia);
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+/* if the Src is opaque, call fbCombineAddU */
+/* if the Dst is opaque, call fbCombineAddU */
+/* if both the Src and Dst are opaque, call fbCombineAddU */
+FASTCALL void
+fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint16_t  sa, da;
+
+        sa = s >> 24;
+        da = ~d >> 24;
+        if (sa > da)
+        {
+            sa = FbIntDiv(da, sa);
+            FbByteMul(s, sa);
+        };
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+
+/*
+ * All of the disjoint composing functions
+
+ The four entries in the first column indicate what source contributions
+ come from each of the four areas of the picture -- areas covered by neither
+ A nor B, areas covered only by A, areas covered only by B and finally
+ areas covered by both A and B.
+
+ Disjoint			Conjoint
+ Fa		Fb		Fa		Fb
+ (0,0,0,0)	0		0		0		0
+ (0,A,0,A)	1		0		1		0
+ (0,0,B,B)	0		1		0		1
+ (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
+ (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
+ (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
+ (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
+ (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
+ (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
+ (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
+ (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
+ (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
+
+*/
+
+#define CombineAOut 1
+#define CombineAIn  2
+#define CombineBOut 4
+#define CombineBIn  8
+
+#define CombineClear	0
+#define CombineA	(CombineAOut|CombineAIn)
+#define CombineB	(CombineBOut|CombineBIn)
+#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
+#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
+#define CombineAAtop	(CombineBOut|CombineAIn)
+#define CombineBAtop	(CombineAOut|CombineBIn)
+#define CombineXor	(CombineAOut|CombineBOut)
+
+/* portion covered by a but not b */
+FASTCALL uint8_t
+fbCombineDisjointOutPart (uint8_t a, uint8_t b)
+{
+    /* min (1, (1-b) / a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* (1-b) / a */
+}
+
+/* portion covered by both a and b */
+FASTCALL uint8_t
+fbCombineDisjointInPart (uint8_t a, uint8_t b)
+{
+    /* max (1-(1-b)/a,0) */
+    /*  = - min ((1-b)/a - 1, 0) */
+    /*  = 1 - min (1, (1-b)/a) */
+
+    b = ~b;		    /* 1 - b */
+    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0;	    /* 1 - 1 */
+    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
+}
+
+/* portion covered by a but not b */
+FASTCALL uint8_t
+fbCombineConjointOutPart (uint8_t a, uint8_t b)
+{
+    /* max (1-b/a,0) */
+    /* = 1-min(b/a,1) */
+
+    /* min (1, (1-b) / a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0x00;	    /* 0 */
+    return ~FbIntDiv(b,a);   /* 1 - b/a */
+}
+
+/* portion covered by both a and b */
+FASTCALL uint8_t
+fbCombineConjointInPart (uint8_t a, uint8_t b)
+{
+    /* min (1,b/a) */
+
+    if (b >= a)		    /* b >= a -> b/a >= 1 */
+	return 0xff;	    /* 1 */
+    return FbIntDiv(b,a);   /* b/a */
+}
+
+FASTCALL void
+fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t m,n,o,p;
+        uint16_t Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineDisjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineDisjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineDisjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineDisjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint16_t  a = s >> 24;
+
+        if (a != 0x00)
+        {
+            if (a != 0xff)
+            {
+                uint32_t d = *(dest + i);
+                a = fbCombineDisjointOutPart (d >> 24, a);
+                FbByteMulAdd(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL void
+fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
+}
+
+FASTCALL void
+fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL void
+fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL void
+fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL void
+fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL void
+fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL void
+fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
+}
+
+FASTCALL void
+fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
+{
+    int i;
+    for (i = 0; i < width; ++i) {
+        uint32_t  s = *(src + i);
+        uint32_t d = *(dest + i);
+        uint32_t  m,n,o,p;
+        uint16_t  Fa, Fb, t, u, v;
+        uint8_t sa = s >> 24;
+        uint8_t da = d >> 24;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            Fa = fbCombineConjointOutPart (sa, da);
+            break;
+        case CombineAIn:
+            Fa = fbCombineConjointInPart (sa, da);
+            break;
+        case CombineA:
+            Fa = 0xff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            Fb = fbCombineConjointOutPart (da, sa);
+            break;
+        case CombineBIn:
+            Fb = fbCombineConjointInPart (da, sa);
+            break;
+        case CombineB:
+            Fb = 0xff;
+            break;
+        }
+        m = FbGen (s,d,0,Fa,Fb,t, u, v);
+        n = FbGen (s,d,8,Fa,Fb,t, u, v);
+        o = FbGen (s,d,16,Fa,Fb,t, u, v);
+        p = FbGen (s,d,24,Fa,Fb,t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
+}
+
+
+FASTCALL void
+fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
+}
+
+
+FASTCALL void
+fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
+}
+
+
+FASTCALL void
+fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
+}
+
+FASTCALL void
+fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
+}
+
+FASTCALL void
+fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
+}
+
+FASTCALL void
+fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
+}
+
+FASTCALL void
+fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
+}
+
+FASTCALL void
+fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    fbCombineConjointGeneralU (dest, src, width, CombineXor);
+}
+
+/********************************************************************************/
+/*************************** Per Channel functions ******************************/
+/********************************************************************************/
+
+FASTCALL void
+fbCombineMaskC (uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *mask;
+
+    uint32_t	x;
+    uint16_t	xa;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    x = *(src);
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    xa = x >> 24;
+    FbByteMulC(x, a);
+    *(src) = x;
+    FbByteMul(a, xa);
+    *(mask) = a;
+}
+
+FASTCALL void
+fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
+{
+    uint32_t a = *mask;
+    uint32_t	x;
+
+    if (!a)
+    {
+	*(src) = 0;
+	return;
+    }
+
+    if (a == 0xffffffff)
+	return;
+
+    x = *(src);
+    FbByteMulC(x, a);
+    *(src) =x;
+}
+
+FASTCALL void
+fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
+{
+    uint32_t a = *(mask);
+    uint32_t	x;
+
+    if (!a)
+	return;
+
+    x = *(src) >> 24;
+    if (x == 0xff)
+	return;
+    if (a == 0xffffffff)
+    {
+	x = x >> 24;
+	x |= x << 8;
+	x |= x << 16;
+	*(mask) = x;
+	return;
+    }
+
+    FbByteMul(a, x);
+    *(mask) = a;
+}
+
+
+
+FASTCALL void
+fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    memset(dest, 0, width*sizeof(uint32_t));
+}
+
+FASTCALL void
+fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+	*(dest) = s;
+    }
+}
+
+FASTCALL void
+fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskC (&s, &m);
+
+	a = ~m;
+        if (a != 0xffffffff)
+        {
+            if (a)
+            {
+                uint32_t d = *(dest + i);
+                FbByteMulAddC(d, a, s);
+                s = d;
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL void
+fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t a = ~d >> 24;
+
+        if (a)
+        {
+            uint32_t s = *(src + i);
+	    uint32_t m = *(mask + i);
+
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMulAdd(s, a, d);
+            }
+	    *(dest + i) = s;
+        }
+    }
+}
+
+FASTCALL void
+fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+	a = m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL void
+fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint16_t a = ~d >> 24;
+        uint32_t s = 0;
+        if (a)
+        {
+	    uint32_t m = *(mask + i);
+
+	    s = *(src + i);
+	    fbCombineMaskValueC (&s, &m);
+
+            if (a != 0xff)
+            {
+                FbByteMul(s, a);
+            }
+        }
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+	uint32_t s = *(src + i);
+	uint32_t m = *(mask + i);
+	uint32_t a;
+
+	fbCombineMaskAlphaC (&s, &m);
+
+        a = ~m;
+        if (a != 0xffffffff)
+        {
+            uint32_t d = 0;
+            if (a)
+            {
+                d = *(dest + i);
+                FbByteMulC(d, a);
+            }
+	    *(dest + i) = d;
+        }
+    }
+}
+
+FASTCALL void
+fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL void
+fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL void
+fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t d = *(dest + i);
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t ad;
+        uint16_t as = ~d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	ad = ~m;
+
+        FbByteAddMulC(d, ad, s, as);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL void
+fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t s = *(src + i);
+        uint32_t m = *(mask + i);
+        uint32_t d = *(dest + i);
+
+	fbCombineMaskValueC (&s, &m);
+
+        FbByteAdd(d, s);
+	*(dest + i) = d;
+    }
+}
+
+FASTCALL void
+fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint16_t  sa, sr, sg, sb, da;
+        uint16_t  t, u, v;
+        uint32_t  m,n,o,p;
+
+        d = *(dest + i);
+        s = *(src + i);
+	m = *(mask + i);
+
+	fbCombineMaskC (&s, &m);
+
+        sa = (m >> 24);
+        sr = (m >> 16) & 0xff;
+        sg = (m >>  8) & 0xff;
+        sb = (m      ) & 0xff;
+        da = ~d >> 24;
+
+        if (sb <= da)
+            m = FbAdd(s,d,0,t);
+        else
+            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
+
+        if (sg <= da)
+            n = FbAdd(s,d,8,t);
+        else
+            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
+
+        if (sr <= da)
+            o = FbAdd(s,d,16,t);
+        else
+            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
+
+        if (sa <= da)
+            p = FbAdd(s,d,24,t);
+        else
+            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
+
+	*(dest + i) = m|n|o|p;
+    }
+}
+
+FASTCALL void
+fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+	sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL void
+fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL void
+fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL void
+fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL void
+fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL void
+fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL void
+fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL void
+fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+FASTCALL void
+fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
+{
+    int i;
+
+    for (i = 0; i < width; ++i) {
+        uint32_t  s, d;
+        uint32_t  m,n,o,p;
+        uint32_t  Fa, Fb;
+        uint16_t  t, u, v;
+        uint32_t  sa;
+        uint8_t   da;
+
+        s = *(src + i);
+        m = *(mask + i);
+        d = *(dest + i);
+        da = d >> 24;
+
+	fbCombineMaskC (&s, &m);
+
+        sa = m;
+
+        switch (combine & CombineA) {
+        default:
+            Fa = 0;
+            break;
+        case CombineAOut:
+            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineAIn:
+            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
+            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
+            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
+            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
+            Fa = m|n|o|p;
+            break;
+        case CombineA:
+            Fa = 0xffffffff;
+            break;
+        }
+
+        switch (combine & CombineB) {
+        default:
+            Fb = 0;
+            break;
+        case CombineBOut:
+            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineBIn:
+            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
+            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
+            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
+            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
+            Fb = m|n|o|p;
+            break;
+        case CombineB:
+            Fb = 0xffffffff;
+            break;
+        }
+        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
+        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
+        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
+        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
+        s = m|n|o|p;
+	*(dest + i) = s;
+    }
+}
+
+FASTCALL void
+fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
+}
+
+FASTCALL void
+fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
+}
+
+FASTCALL void
+fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
+}
+
+FASTCALL void
+fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
+}
+
+FASTCALL void
+fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
+}
+
+FASTCALL void
+fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
+}
+
+FASTCALL void
+fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
+}
+
+FASTCALL void
+fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
+}
+
+FASTCALL void
+fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
+}
+
+CombineFuncU pixman_fbCombineFuncU[] = {
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineOverU,
+    fbCombineOverReverseU,
+    fbCombineInU,
+    fbCombineInReverseU,
+    fbCombineOutU,
+    fbCombineOutReverseU,
+    fbCombineAtopU,
+    fbCombineAtopReverseU,
+    fbCombineXorU,
+    fbCombineAddU,
+    fbCombineSaturateU,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineDisjointOverU,
+    fbCombineSaturateU, /* DisjointOverReverse */
+    fbCombineDisjointInU,
+    fbCombineDisjointInReverseU,
+    fbCombineDisjointOutU,
+    fbCombineDisjointOutReverseU,
+    fbCombineDisjointAtopU,
+    fbCombineDisjointAtopReverseU,
+    fbCombineDisjointXorU,
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClear,
+    fbCombineSrcU,
+    NULL, /* CombineDst */
+    fbCombineConjointOverU,
+    fbCombineConjointOverReverseU,
+    fbCombineConjointInU,
+    fbCombineConjointInReverseU,
+    fbCombineConjointOutU,
+    fbCombineConjointOutReverseU,
+    fbCombineConjointAtopU,
+    fbCombineConjointAtopReverseU,
+    fbCombineConjointXorU,
+};
+
+CombineFuncC pixman_fbCombineFuncC[] = {
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineOverC,
+    fbCombineOverReverseC,
+    fbCombineInC,
+    fbCombineInReverseC,
+    fbCombineOutC,
+    fbCombineOutReverseC,
+    fbCombineAtopC,
+    fbCombineAtopReverseC,
+    fbCombineXorC,
+    fbCombineAddC,
+    fbCombineSaturateC,
+    NULL,
+    NULL,
+    fbCombineClearC,	    /* 0x10 */
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineDisjointOverC,
+    fbCombineSaturateC, /* DisjointOverReverse */
+    fbCombineDisjointInC,
+    fbCombineDisjointInReverseC,
+    fbCombineDisjointOutC,
+    fbCombineDisjointOutReverseC,
+    fbCombineDisjointAtopC,
+    fbCombineDisjointAtopReverseC,
+    fbCombineDisjointXorC,  /* 0x1b */
+    NULL,
+    NULL,
+    NULL,
+    NULL,
+    fbCombineClearC,
+    fbCombineSrcC,
+    NULL, /* Dest */
+    fbCombineConjointOverC,
+    fbCombineConjointOverReverseC,
+    fbCombineConjointInC,
+    fbCombineConjointInReverseC,
+    fbCombineConjointOutC,
+    fbCombineConjointOutReverseC,
+    fbCombineConjointAtopC,
+    fbCombineConjointAtopReverseC,
+    fbCombineConjointXorC,
+};
\ No newline at end of file
diff --git a/pixman/pixman-compose.c b/pixman/pixman-compose.c
index faf2523..4ec9f69 100644
--- a/pixman/pixman-compose.c
+++ b/pixman/pixman-compose.c
@@ -35,20 +35,6 @@
 
 #include "pixman-private.h"
 
-/*
- *    FIXME:
- *		The stuff here is added just to get it to compile. Something sensible needs to
- *              be done before this can be used.
- *
- *   we should go through this code and clean up some of the weird stuff that have
- *   resulted from unmacro-ifying it.
- *
- */
-#define INLINE inline
-
-/*   End of stuff added to get it to compile
- */
-
 static unsigned int
 SourcePictureClassify (source_image_t *pict,
 		       int	       x,
@@ -1770,1228 +1756,6 @@ static storeProc storeProcForPicture (bits_image_t * pict)
 }
 
 
-/*
- * Combine src and mask
- */
-static FASTCALL void
-pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t a = *(mask + i) >> 24;
-        uint32_t s = *(src + i);
-        FbByteMul(s, a);
-        *(src + i) = s;
-    }
-}
-
-/*
- * All of the composing functions
- */
-
-static FASTCALL void
-fbCombineClear (uint32_t *dest, const uint32_t *src, int width)
-{
-    memset(dest, 0, width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbCombineSrcU (uint32_t *dest, const uint32_t *src, int width)
-{
-    memcpy(dest, src, width*sizeof(uint32_t));
-}
-
-
-static FASTCALL void
-fbCombineOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~s);
-
-        FbByteMulAdd(d, ia, s);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t ia = Alpha(~*(dest + i));
-        FbByteMulAdd(s, ia, d);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(*(dest + i));
-        FbByteMul(s, a);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(*(src + i));
-        FbByteMul(d, a);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t a = Alpha(~*(dest + i));
-        FbByteMul(s, a);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = Alpha(~*(src + i));
-        FbByteMul(d, a);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t dest_a = Alpha(d);
-        uint32_t src_ia = Alpha(~s);
-
-        FbByteAddMul(s, dest_a, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_a = Alpha(s);
-        uint32_t dest_ia = Alpha(~d);
-
-        FbByteAddMul(s, dest_ia, d, src_a);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t src_ia = Alpha(~s);
-        uint32_t dest_ia = Alpha(~d);
-
-        FbByteAddMul(s, dest_ia, d, src_ia);
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineAddU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineSaturateU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint16_t  sa, da;
-
-        sa = s >> 24;
-        da = ~d >> 24;
-        if (sa > da)
-        {
-            sa = FbIntDiv(da, sa);
-            FbByteMul(s, sa);
-        };
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-/*
- * All of the disjoint composing functions
-
- The four entries in the first column indicate what source contributions
- come from each of the four areas of the picture -- areas covered by neither
- A nor B, areas covered only by A, areas covered only by B and finally
- areas covered by both A and B.
-
- Disjoint			Conjoint
- Fa		Fb		Fa		Fb
- (0,0,0,0)	0		0		0		0
- (0,A,0,A)	1		0		1		0
- (0,0,B,B)	0		1		0		1
- (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
- (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
- (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
- (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
- (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
- (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
- (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
- (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
- (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
-
-*/
-
-#define CombineAOut 1
-#define CombineAIn  2
-#define CombineBOut 4
-#define CombineBIn  8
-
-#define CombineClear	0
-#define CombineA	(CombineAOut|CombineAIn)
-#define CombineB	(CombineBOut|CombineBIn)
-#define CombineAOver	(CombineAOut|CombineBOut|CombineAIn)
-#define CombineBOver	(CombineAOut|CombineBOut|CombineBIn)
-#define CombineAAtop	(CombineBOut|CombineAIn)
-#define CombineBAtop	(CombineAOut|CombineBIn)
-#define CombineXor	(CombineAOut|CombineBOut)
-
-/* portion covered by a but not b */
-static INLINE uint8_t
-fbCombineDisjointOutPart (uint8_t a, uint8_t b)
-{
-    /* min (1, (1-b) / a) */
-
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0xff;	    /* 1 */
-    return FbIntDiv(b,a);   /* (1-b) / a */
-}
-
-/* portion covered by both a and b */
-static INLINE uint8_t
-fbCombineDisjointInPart (uint8_t a, uint8_t b)
-{
-    /* max (1-(1-b)/a,0) */
-    /*  = - min ((1-b)/a - 1, 0) */
-    /*  = 1 - min (1, (1-b)/a) */
-
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0;	    /* 1 - 1 */
-    return ~FbIntDiv(b,a);  /* 1 - (1-b) / a */
-}
-
-static FASTCALL void
-fbCombineDisjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t m,n,o,p;
-        uint16_t Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            Fa = fbCombineDisjointOutPart (sa, da);
-            break;
-        case CombineAIn:
-            Fa = fbCombineDisjointInPart (sa, da);
-            break;
-        case CombineA:
-            Fa = 0xff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            Fb = fbCombineDisjointOutPart (da, sa);
-            break;
-        case CombineBIn:
-            Fb = fbCombineDisjointInPart (da, sa);
-            break;
-        case CombineB:
-            Fb = 0xff;
-            break;
-        }
-        m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint16_t  a = s >> 24;
-
-        if (a != 0x00)
-        {
-            if (a != 0xff)
-            {
-                uint32_t d = *(dest + i);
-                a = fbCombineDisjointOutPart (d >> 24, a);
-                FbByteMulAdd(d, a, s);
-                s = d;
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAIn);
-}
-
-static FASTCALL void
-fbCombineDisjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineDisjointOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineDisjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineDisjointGeneralU (dest, src, width, CombineXor);
-}
-
-/* portion covered by a but not b */
-static INLINE uint8_t
-fbCombineConjointOutPart (uint8_t a, uint8_t b)
-{
-    /* max (1-b/a,0) */
-    /* = 1-min(b/a,1) */
-
-    /* min (1, (1-b) / a) */
-
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0x00;	    /* 0 */
-    return ~FbIntDiv(b,a);   /* 1 - b/a */
-}
-
-/* portion covered by both a and b */
-static INLINE uint8_t
-fbCombineConjointInPart (uint8_t a, uint8_t b)
-{
-    /* min (1,b/a) */
-
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0xff;	    /* 1 */
-    return FbIntDiv(b,a);   /* b/a */
-}
-
-static FASTCALL void
-fbCombineConjointGeneralU (uint32_t *dest, const uint32_t *src, int width, uint8_t combine)
-{
-    int i;
-    for (i = 0; i < width; ++i) {
-        uint32_t  s = *(src + i);
-        uint32_t d = *(dest + i);
-        uint32_t  m,n,o,p;
-        uint16_t  Fa, Fb, t, u, v;
-        uint8_t sa = s >> 24;
-        uint8_t da = d >> 24;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            Fa = fbCombineConjointOutPart (sa, da);
-            break;
-        case CombineAIn:
-            Fa = fbCombineConjointInPart (sa, da);
-            break;
-        case CombineA:
-            Fa = 0xff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            Fb = fbCombineConjointOutPart (da, sa);
-            break;
-        case CombineBIn:
-            Fb = fbCombineConjointInPart (da, sa);
-            break;
-        case CombineB:
-            Fb = 0xff;
-            break;
-        }
-        m = FbGen (s,d,0,Fa,Fb,t, u, v);
-        n = FbGen (s,d,8,Fa,Fb,t, u, v);
-        o = FbGen (s,d,16,Fa,Fb,t, u, v);
-        p = FbGen (s,d,24,Fa,Fb,t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineConjointOverU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAOver);
-}
-
-
-static FASTCALL void
-fbCombineConjointOverReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBOver);
-}
-
-
-static FASTCALL void
-fbCombineConjointInU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAIn);
-}
-
-
-static FASTCALL void
-fbCombineConjointInReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineConjointOutU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineConjointOutReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineConjointAtopU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineConjointAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineConjointXorU (uint32_t *dest, const uint32_t *src, int width)
-{
-    fbCombineConjointGeneralU (dest, src, width, CombineXor);
-}
-
-static CombineFuncU pixman_fbCombineFuncU[] = {
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineOverU,
-    fbCombineOverReverseU,
-    fbCombineInU,
-    fbCombineInReverseU,
-    fbCombineOutU,
-    fbCombineOutReverseU,
-    fbCombineAtopU,
-    fbCombineAtopReverseU,
-    fbCombineXorU,
-    fbCombineAddU,
-    fbCombineSaturateU,
-    NULL,
-    NULL,
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineDisjointOverU,
-    fbCombineSaturateU, /* DisjointOverReverse */
-    fbCombineDisjointInU,
-    fbCombineDisjointInReverseU,
-    fbCombineDisjointOutU,
-    fbCombineDisjointOutReverseU,
-    fbCombineDisjointAtopU,
-    fbCombineDisjointAtopReverseU,
-    fbCombineDisjointXorU,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    fbCombineClear,
-    fbCombineSrcU,
-    NULL, /* CombineDst */
-    fbCombineConjointOverU,
-    fbCombineConjointOverReverseU,
-    fbCombineConjointInU,
-    fbCombineConjointInReverseU,
-    fbCombineConjointOutU,
-    fbCombineConjointOutReverseU,
-    fbCombineConjointAtopU,
-    fbCombineConjointAtopReverseU,
-    fbCombineConjointXorU,
-};
-
-static INLINE void
-fbCombineMaskC (uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *mask;
-
-    uint32_t	x;
-    uint16_t	xa;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    x = *(src);
-    if (a == 0xffffffff)
-    {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
-	*(mask) = x;
-	return;
-    }
-
-    xa = x >> 24;
-    FbByteMulC(x, a);
-    *(src) = x;
-    FbByteMul(a, xa);
-    *(mask) = a;
-}
-
-static INLINE void
-fbCombineMaskValueC (uint32_t *src, const uint32_t *mask)
-{
-    uint32_t a = *mask;
-    uint32_t	x;
-
-    if (!a)
-    {
-	*(src) = 0;
-	return;
-    }
-
-    if (a == 0xffffffff)
-	return;
-
-    x = *(src);
-    FbByteMulC(x, a);
-    *(src) =x;
-}
-
-static INLINE void
-fbCombineMaskAlphaC (const uint32_t *src, uint32_t *mask)
-{
-    uint32_t a = *(mask);
-    uint32_t	x;
-
-    if (!a)
-	return;
-
-    x = *(src) >> 24;
-    if (x == 0xff)
-	return;
-    if (a == 0xffffffff)
-    {
-	x = x >> 24;
-	x |= x << 8;
-	x |= x << 16;
-	*(mask) = x;
-	return;
-    }
-
-    FbByteMul(a, x);
-    *(mask) = a;
-}
-
-static FASTCALL void
-fbCombineClearC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    memset(dest, 0, width*sizeof(uint32_t));
-}
-
-static FASTCALL void
-fbCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-
-	fbCombineMaskValueC (&s, &m);
-
-	*(dest) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	fbCombineMaskC (&s, &m);
-
-	a = ~m;
-        if (a != 0xffffffff)
-        {
-            if (a)
-            {
-                uint32_t d = *(dest + i);
-                FbByteMulAddC(d, a, s);
-                s = d;
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t a = ~d >> 24;
-
-        if (a)
-        {
-            uint32_t s = *(src + i);
-	    uint32_t m = *(mask + i);
-
-	    fbCombineMaskValueC (&s, &m);
-
-            if (a != 0xff)
-            {
-                FbByteMulAdd(s, a, d);
-            }
-	    *(dest + i) = s;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = d >> 24;
-        uint32_t s = 0;
-        if (a)
-        {
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    fbCombineMaskValueC (&s, &m);
-            if (a != 0xff)
-            {
-                FbByteMul(s, a);
-            }
-        }
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t a;
-
-	fbCombineMaskAlphaC (&s, &m);
-
-	a = m;
-        if (a != 0xffffffff)
-        {
-            uint32_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                FbByteMulC(d, a);
-            }
-	    *(dest + i) = d;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint16_t a = ~d >> 24;
-        uint32_t s = 0;
-        if (a)
-        {
-	    uint32_t m = *(mask + i);
-
-	    s = *(src + i);
-	    fbCombineMaskValueC (&s, &m);
-
-            if (a != 0xff)
-            {
-                FbByteMul(s, a);
-            }
-        }
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-	uint32_t s = *(src + i);
-	uint32_t m = *(mask + i);
-	uint32_t a;
-
-	fbCombineMaskAlphaC (&s, &m);
-
-        a = ~m;
-        if (a != 0xffffffff)
-        {
-            uint32_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                FbByteMulC(d, a);
-            }
-	    *(dest + i) = d;
-        }
-    }
-}
-
-static FASTCALL void
-fbCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-        ad = ~m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	ad = m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t d = *(dest + i);
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t ad;
-        uint16_t as = ~d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	ad = ~m;
-
-        FbByteAddMulC(d, ad, s, as);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t s = *(src + i);
-        uint32_t m = *(mask + i);
-        uint32_t d = *(dest + i);
-
-	fbCombineMaskValueC (&s, &m);
-
-        FbByteAdd(d, s);
-	*(dest + i) = d;
-    }
-}
-
-static FASTCALL void
-fbCombineSaturateC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint16_t  sa, sr, sg, sb, da;
-        uint16_t  t, u, v;
-        uint32_t  m,n,o,p;
-
-        d = *(dest + i);
-        s = *(src + i);
-	m = *(mask + i);
-
-	fbCombineMaskC (&s, &m);
-
-        sa = (m >> 24);
-        sr = (m >> 16) & 0xff;
-        sg = (m >>  8) & 0xff;
-        sb = (m      ) & 0xff;
-        da = ~d >> 24;
-
-        if (sb <= da)
-            m = FbAdd(s,d,0,t);
-        else
-            m = FbGen (s, d, 0, (da << 8) / sb, 0xff, t, u, v);
-
-        if (sg <= da)
-            n = FbAdd(s,d,8,t);
-        else
-            n = FbGen (s, d, 8, (da << 8) / sg, 0xff, t, u, v);
-
-        if (sr <= da)
-            o = FbAdd(s,d,16,t);
-        else
-            o = FbGen (s, d, 16, (da << 8) / sr, 0xff, t, u, v);
-
-        if (sa <= da)
-            p = FbAdd(s,d,24,t);
-        else
-            p = FbGen (s, d, 24, (da << 8) / sa, 0xff, t, u, v);
-
-	*(dest + i) = m|n|o|p;
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-	sa = m;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            m = fbCombineDisjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointOutPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineAIn:
-            m = fbCombineDisjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineDisjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineDisjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineDisjointInPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineA:
-            Fa = 0xffffffff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            m = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineBIn:
-            m = fbCombineDisjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineDisjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineDisjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineDisjointInPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineB:
-            Fb = 0xffffffff;
-            break;
-        }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineDisjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOver);
-}
-
-static FASTCALL void
-fbCombineDisjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAIn);
-}
-
-static FASTCALL void
-fbCombineDisjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineDisjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineDisjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineDisjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineDisjointGeneralC (dest, src, mask, width, CombineXor);
-}
-
-static FASTCALL void
-fbCombineConjointGeneralC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width, uint8_t combine)
-{
-    int i;
-
-    for (i = 0; i < width; ++i) {
-        uint32_t  s, d;
-        uint32_t  m,n,o,p;
-        uint32_t  Fa, Fb;
-        uint16_t  t, u, v;
-        uint32_t  sa;
-        uint8_t   da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> 24;
-
-	fbCombineMaskC (&s, &m);
-
-        sa = m;
-
-        switch (combine & CombineA) {
-        default:
-            Fa = 0;
-            break;
-        case CombineAOut:
-            m = fbCombineConjointOutPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointOutPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointOutPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointOutPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineAIn:
-            m = fbCombineConjointInPart ((uint8_t) (sa >> 0), da);
-            n = fbCombineConjointInPart ((uint8_t) (sa >> 8), da) << 8;
-            o = fbCombineConjointInPart ((uint8_t) (sa >> 16), da) << 16;
-            p = fbCombineConjointInPart ((uint8_t) (sa >> 24), da) << 24;
-            Fa = m|n|o|p;
-            break;
-        case CombineA:
-            Fa = 0xffffffff;
-            break;
-        }
-
-        switch (combine & CombineB) {
-        default:
-            Fb = 0;
-            break;
-        case CombineBOut:
-            m = fbCombineConjointOutPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointOutPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointOutPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointOutPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineBIn:
-            m = fbCombineConjointInPart (da, (uint8_t) (sa >> 0));
-            n = fbCombineConjointInPart (da, (uint8_t) (sa >> 8)) << 8;
-            o = fbCombineConjointInPart (da, (uint8_t) (sa >> 16)) << 16;
-            p = fbCombineConjointInPart (da, (uint8_t) (sa >> 24)) << 24;
-            Fb = m|n|o|p;
-            break;
-        case CombineB:
-            Fb = 0xffffffff;
-            break;
-        }
-        m = FbGen (s,d,0,FbGet8(Fa,0),FbGet8(Fb,0),t, u, v);
-        n = FbGen (s,d,8,FbGet8(Fa,8),FbGet8(Fb,8),t, u, v);
-        o = FbGen (s,d,16,FbGet8(Fa,16),FbGet8(Fb,16),t, u, v);
-        p = FbGen (s,d,24,FbGet8(Fa,24),FbGet8(Fb,24),t, u, v);
-        s = m|n|o|p;
-	*(dest + i) = s;
-    }
-}
-
-static FASTCALL void
-fbCombineConjointOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOver);
-}
-
-static FASTCALL void
-fbCombineConjointOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOver);
-}
-
-static FASTCALL void
-fbCombineConjointInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAIn);
-}
-
-static FASTCALL void
-fbCombineConjointInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBIn);
-}
-
-static FASTCALL void
-fbCombineConjointOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAOut);
-}
-
-static FASTCALL void
-fbCombineConjointOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBOut);
-}
-
-static FASTCALL void
-fbCombineConjointAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineAAtop);
-}
-
-static FASTCALL void
-fbCombineConjointAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineBAtop);
-}
-
-static FASTCALL void
-fbCombineConjointXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
-{
-    fbCombineConjointGeneralC (dest, src, mask, width, CombineXor);
-}
-
-static CombineFuncC pixman_fbCombineFuncC[] = {
-    fbCombineClearC,
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineOverC,
-    fbCombineOverReverseC,
-    fbCombineInC,
-    fbCombineInReverseC,
-    fbCombineOutC,
-    fbCombineOutReverseC,
-    fbCombineAtopC,
-    fbCombineAtopReverseC,
-    fbCombineXorC,
-    fbCombineAddC,
-    fbCombineSaturateC,
-    NULL,
-    NULL,
-    fbCombineClearC,	    /* 0x10 */
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineDisjointOverC,
-    fbCombineSaturateC, /* DisjointOverReverse */
-    fbCombineDisjointInC,
-    fbCombineDisjointInReverseC,
-    fbCombineDisjointOutC,
-    fbCombineDisjointOutReverseC,
-    fbCombineDisjointAtopC,
-    fbCombineDisjointAtopReverseC,
-    fbCombineDisjointXorC,  /* 0x1b */
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    fbCombineClearC,
-    fbCombineSrcC,
-    NULL, /* Dest */
-    fbCombineConjointOverC,
-    fbCombineConjointOverReverseC,
-    fbCombineConjointInC,
-    fbCombineConjointInReverseC,
-    fbCombineConjointOutC,
-    fbCombineConjointOutReverseC,
-    fbCombineConjointAtopC,
-    fbCombineConjointAtopReverseC,
-    fbCombineConjointXorC,
-};
-
-
 static void fbFetchSolid(bits_image_t * pict, int x, int y, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits)
 {
     uint32_t color;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 10bb8fc..bfc0dff 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -299,6 +299,11 @@ union pixman_image
     solid_fill_t		solid;
 };
 
+
+CombineFuncU pixman_fbCombineFuncU[];
+CombineFuncC pixman_fbCombineFuncC[];
+FASTCALL void pixman_fbCombineMaskU (uint32_t *src, const uint32_t *mask, int width);
+
 #define LOG2_BITMAP_PAD 5
 #define FB_STIP_SHIFT	LOG2_BITMAP_PAD
 #define FB_STIP_UNIT	(1 << FB_STIP_SHIFT)


More information about the xorg-commit mailing list