pixman: Branch 'region32' - 31 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Sun Jun 8 21:00:41 PDT 2008


 TODO                           |    2 
 configure.ac                   |   33 +
 pixman/Makefile.am             |   22 
 pixman/combine.h.inc           |  215 ++++++++
 pixman/combine.inc             |  199 -------
 pixman/combine.pl              |   54 +-
 pixman/pixman-compute-region.c |  105 ++--
 pixman/pixman-edge.c           |    2 
 pixman/pixman-image.c          |  105 ++--
 pixman/pixman-pict.c           |   88 ++-
 pixman/pixman-private.h        |   32 +
 pixman/pixman-region.c         |   58 --
 pixman/pixman-region16.c       |   29 +
 pixman/pixman-region32.c       |   27 +
 pixman/pixman-sse.c            |    2 
 pixman/pixman-transformed.c    |   42 -
 pixman/pixman-trap.c           |   16 
 pixman/pixman-utils.c          |   20 
 pixman/pixman-vmx.c            | 1068 +++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-vmx.h            |  308 +++++++++++
 pixman/pixman.h                |  368 +++++---------
 21 files changed, 2182 insertions(+), 613 deletions(-)

New commits:
commit 62e9b4d6cfcbc10046539b8e9643691bb02bea39
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Mon Jun 9 00:00:07 2008 -0400

    Export pixman_compute_composite_region32() and use it in walk_region

diff --git a/TODO b/TODO
index 5acadf3..4f8f9c4 100644
--- a/TODO
+++ b/TODO
@@ -1,3 +1,5 @@
+  - Behdad's MMX issue - see list
+
   - SSE 2 issues:
 
       - Commented-out uses of fbCompositeCopyAreasse2()
diff --git a/pixman/pixman-compute-region.c b/pixman/pixman-compute-region.c
index fa0dd99..a93cee0 100644
--- a/pixman/pixman-compute-region.c
+++ b/pixman/pixman-compute-region.c
@@ -123,7 +123,7 @@ miClipPictureSrc (pixman_region32_t *	pRegion,
  * an allocation failure, but rendering ignores those anyways.
  */
 
-static pixman_bool_t
+pixman_bool_t
 pixman_compute_composite_region32 (pixman_region32_t *	pRegion,
 				   pixman_image_t *	pSrc,
 				   pixman_image_t *	pMask,
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 127e257..7c88a65 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1187,22 +1187,22 @@ pixman_walk_composite_region (pixman_op_t op,
 			      CompositeFunc compositeRect)
 {
     int		    n;
-    const pixman_box16_t *pbox;
+    const pixman_box32_t *pbox;
     int		    w, h, w_this, h_this;
     int		    x_msk, y_msk, x_src, y_src, x_dst, y_dst;
-    pixman_region16_t reg;
-    pixman_region16_t *region;
+    pixman_region32_t reg;
+    pixman_region32_t *region;
 
-    pixman_region_init (&reg);
-    if (!pixman_compute_composite_region (&reg, pSrc, pMask, pDst,
-					  xSrc, ySrc, xMask, yMask, xDst, yDst, width, height))
+    pixman_region32_init (&reg);
+    if (!pixman_compute_composite_region32 (&reg, pSrc, pMask, pDst,
+					    xSrc, ySrc, xMask, yMask, xDst, yDst, width, height))
     {
 	return;
     }
 
     region = &reg;
 
-    pbox = pixman_region_rectangles (region, &n);
+    pbox = pixman_region32_rectangles (region, &n);
     while (n--)
     {
 	h = pbox->y2 - pbox->y1;
@@ -1258,7 +1258,7 @@ pixman_walk_composite_region (pixman_op_t op,
 	}
 	pbox++;
     }
-    pixman_region_fini (&reg);
+    pixman_region32_fini (&reg);
 }
 
 static void
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 951632c..0ea0cb3 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -686,6 +686,19 @@ pixman_image_is_opaque(pixman_image_t *image);
 pixman_bool_t
 pixman_image_can_get_solid (pixman_image_t *image);
 
+pixman_bool_t
+pixman_compute_composite_region32 (pixman_region32_t *	pRegion,
+				   pixman_image_t *	pSrc,
+				   pixman_image_t *	pMask,
+				   pixman_image_t *	pDst,
+				   int16_t		xSrc,
+				   int16_t		ySrc,
+				   int16_t		xMask,
+				   int16_t		yMask,
+				   int16_t		xDst,
+				   int16_t		yDst,
+				   uint16_t		width,
+				   uint16_t		height);
 
 /* GCC visibility */
 #if defined(__GNUC__) && __GNUC__ >= 4
commit 57819ae3c219252db98df5eefa36499a6b77970c
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 23:53:35 2008 -0400

    Fix bug in pixman_compute_composite_region()
    
    It was using the output region as the input. Add and use
    pixman_region16_copy_from_region32().

diff --git a/pixman/pixman-compute-region.c b/pixman/pixman-compute-region.c
index 859f02e..fa0dd99 100644
--- a/pixman/pixman-compute-region.c
+++ b/pixman/pixman-compute-region.c
@@ -227,12 +227,15 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
 
     pixman_region32_init (&r32);
     
-    if (!pixman_region32_copy_from_region16 (&r32, pRegion))
-	return FALSE;
-
     retval = pixman_compute_composite_region32 (&r32, pSrc, pMask, pDst,
 						xSrc, ySrc, xMask, yMask, xDst, yDst,
 						width, height);
+
+    if (retval)
+    {
+	if (!pixman_region16_copy_from_region32 (pRegion, &r32))
+	    retval = FALSE;
+    }
     
     pixman_region32_fini (&r32);
     return retval;
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 7f46ccd..951632c 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -694,10 +694,11 @@ pixman_image_can_get_solid (pixman_image_t *image);
 #define PIXMAN_EXPORT
 #endif
 
-/* Helper for 32 bit regions */
-pixman_bool_t
-pixman_region32_copy_from_region16 (pixman_region32_t *dst,
-				    pixman_region16_t *src);
+/* Region Helpers */
+pixman_bool_t pixman_region32_copy_from_region16 (pixman_region32_t *dst,
+						  pixman_region16_t *src);
+pixman_bool_t pixman_region16_copy_from_region32 (pixman_region16_t *dst,
+						  pixman_region32_t *src);
 
 #ifdef PIXMAN_TIMING
 
diff --git a/pixman/pixman-region16.c b/pixman/pixman-region16.c
index 8d4a050..1a0edfe 100644
--- a/pixman/pixman-region16.c
+++ b/pixman/pixman-region16.c
@@ -40,4 +40,31 @@ typedef struct {
 
 #define PREFIX(x) pixman_region##x
 
+pixman_bool_t
+pixman_region16_copy_from_region32 (pixman_region16_t *dst,
+				    pixman_region32_t *src)
+{
+    int n_boxes, i;
+    pixman_box32_t *boxes32;
+    pixman_box16_t *boxes16;
+    
+    boxes32 = pixman_region32_rectangles (src, &n_boxes);
+
+    boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t));
+
+    if (!boxes16)
+	return FALSE;
+    
+    for (i = 0; i < n_boxes; ++i)
+    {
+	boxes16[i].x1 = boxes32[i].x1;
+	boxes16[i].y1 = boxes32[i].y1;
+	boxes16[i].x2 = boxes32[i].x2;
+	boxes16[i].y2 = boxes32[i].y2;
+    }
+
+    pixman_region_fini (dst);
+    return pixman_region_init_rects (dst, boxes16, n_boxes);
+}
+
 #include "pixman-region.c"
commit 664b891aac50642d6d2ab6c482f4765029ae9b91
Merge: 0b4c6dc... 2b91152...
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 23:39:29 2008 -0400

    Merge branch 'master' into region32

commit 2b9115293e5fca70ca9ffe44ef74c80885dcedbb
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 23:39:03 2008 -0400

    Call _mm_empty() at the end of fbComposeSetupSSE

diff --git a/pixman/pixman-sse.c b/pixman/pixman-sse.c
index ca16515..13bfe1c 100644
--- a/pixman/pixman-sse.c
+++ b/pixman/pixman-sse.c
@@ -2334,6 +2334,8 @@ fbComposeSetupSSE(void)
     }
 
     initialized = TRUE;
+
+    _mm_empty();
 }
 
 
commit 0b4c6dcefd63a43aa9bb6556017e259589116522
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 22:48:59 2008 -0400

    Add pixman_image_set_clip_region32

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 61b3fc0..487a672 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -387,6 +387,25 @@ pixman_image_create_bits (pixman_format_code_t  format,
 }
 
 PIXMAN_EXPORT pixman_bool_t
+pixman_image_set_clip_region32 (pixman_image_t *image,
+				pixman_region32_t *region)
+{
+    image_common_t *common = (image_common_t *)image;
+
+    if (region)
+    {
+	return pixman_region32_copy (&common->clip_region, region);
+    }
+    else
+    {
+	reset_clip_region (image);
+
+	return TRUE;
+    }
+}
+
+
+PIXMAN_EXPORT pixman_bool_t
 pixman_image_set_clip_region (pixman_image_t    *image,
 			      pixman_region16_t *region)
 {
diff --git a/pixman/pixman.h b/pixman/pixman.h
index cf9d771..8bfbb51 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -596,6 +596,8 @@ pixman_bool_t   pixman_image_unref                   (pixman_image_t
 /* Set properties */
 pixman_bool_t   pixman_image_set_clip_region         (pixman_image_t               *image,
 						      pixman_region16_t            *region);
+pixman_bool_t   pixman_image_set_clip_region32       (pixman_image_t               *image,
+						      pixman_region32_t            *region);
 void		pixman_image_set_has_client_clip     (pixman_image_t               *image,
 						      pixman_bool_t		    clien_clip);
 pixman_bool_t   pixman_image_set_transform           (pixman_image_t               *image,
commit 703f82cd02f5224632b4b7f7f3f072067fa4f76d
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 22:20:44 2008 -0400

    Use 32 bit regions internally

diff --git a/pixman/pixman-compute-region.c b/pixman/pixman-compute-region.c
index 6743304..859f02e 100644
--- a/pixman/pixman-compute-region.c
+++ b/pixman/pixman-compute-region.c
@@ -32,16 +32,16 @@
 #define BOUND(v)	(int16_t) ((v) < INT16_MIN ? INT16_MIN : (v) > INT16_MAX ? INT16_MAX : (v))
 
 static inline pixman_bool_t
-miClipPictureReg (pixman_region16_t *	pRegion,
-		  pixman_region16_t *	pClip,
+miClipPictureReg (pixman_region32_t *	pRegion,
+		  pixman_region32_t *	pClip,
 		  int		dx,
 		  int		dy)
 {
-    if (pixman_region_n_rects(pRegion) == 1 &&
-	pixman_region_n_rects(pClip) == 1)
+    if (pixman_region32_n_rects(pRegion) == 1 &&
+	pixman_region32_n_rects(pClip) == 1)
     {
-	pixman_box16_t *  pRbox = pixman_region_rectangles(pRegion, NULL);
-	pixman_box16_t *  pCbox = pixman_region_rectangles(pClip, NULL);
+	pixman_box32_t *  pRbox = pixman_region32_rectangles(pRegion, NULL);
+	pixman_box32_t *  pCbox = pixman_region32_rectangles(pClip, NULL);
 	int	v;
 	
 	if (pRbox->x1 < (v = pCbox->x1 + dx))
@@ -55,26 +55,26 @@ miClipPictureReg (pixman_region16_t *	pRegion,
 	if (pRbox->x1 >= pRbox->x2 ||
 	    pRbox->y1 >= pRbox->y2)
 	{
-	    pixman_region_init (pRegion);
+	    pixman_region32_init (pRegion);
 	}
     }
-    else if (!pixman_region_not_empty (pClip))
+    else if (!pixman_region32_not_empty (pClip))
 	return FALSE;
     else
     {
 	if (dx || dy)
-	    pixman_region_translate (pRegion, -dx, -dy);
-	if (!pixman_region_intersect (pRegion, pRegion, pClip))
+	    pixman_region32_translate (pRegion, -dx, -dy);
+	if (!pixman_region32_intersect (pRegion, pRegion, pClip))
 	    return FALSE;
 	if (dx || dy)
-	    pixman_region_translate(pRegion, dx, dy);
+	    pixman_region32_translate(pRegion, dx, dy);
     }
-    return pixman_region_not_empty(pRegion);
+    return pixman_region32_not_empty(pRegion);
 }
 
 
 static inline pixman_bool_t
-miClipPictureSrc (pixman_region16_t *	pRegion,
+miClipPictureSrc (pixman_region32_t *	pRegion,
 		  pixman_image_t *	pPicture,
 		  int		dx,
 		  int		dy)
@@ -98,13 +98,13 @@ miClipPictureSrc (pixman_region16_t *	pRegion,
 	 */
 	if (pPicture->common.has_client_clip)
 	{
-	    pixman_region_translate ( pRegion, dx, dy);
+	    pixman_region32_translate ( pRegion, dx, dy);
 	    
-	    if (!pixman_region_intersect (pRegion, pRegion, 
-					  (pixman_region16_t *) pPicture->common.src_clip))
+	    if (!pixman_region32_intersect (pRegion, pRegion, 
+					    pPicture->common.src_clip))
 		return FALSE;
 	    
-	    pixman_region_translate ( pRegion, -dx, -dy);
+	    pixman_region32_translate ( pRegion, -dx, -dy);
 	}
 	    
 	return TRUE;
@@ -123,19 +123,19 @@ miClipPictureSrc (pixman_region16_t *	pRegion,
  * an allocation failure, but rendering ignores those anyways.
  */
 
-PIXMAN_EXPORT pixman_bool_t
-pixman_compute_composite_region (pixman_region16_t *	pRegion,
-				 pixman_image_t *	pSrc,
-				 pixman_image_t *	pMask,
-				 pixman_image_t *	pDst,
-				 int16_t		xSrc,
-				 int16_t		ySrc,
-				 int16_t		xMask,
-				 int16_t		yMask,
-				 int16_t		xDst,
-				 int16_t		yDst,
-				 uint16_t	width,
-				 uint16_t	height)
+static pixman_bool_t
+pixman_compute_composite_region32 (pixman_region32_t *	pRegion,
+				   pixman_image_t *	pSrc,
+				   pixman_image_t *	pMask,
+				   pixman_image_t *	pDst,
+				   int16_t		xSrc,
+				   int16_t		ySrc,
+				   int16_t		xMask,
+				   int16_t		yMask,
+				   int16_t		xDst,
+				   int16_t		yDst,
+				   uint16_t		width,
+				   uint16_t		height)
 {
     int		v;
     
@@ -150,13 +150,13 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
     if (pRegion->extents.x1 >= pRegion->extents.x2 ||
 	pRegion->extents.y1 >= pRegion->extents.y2)
     {
-	pixman_region_init (pRegion);
+	pixman_region32_init (pRegion);
 	return FALSE;
     }
     /* clip against dst */
     if (!miClipPictureReg (pRegion, &pDst->common.clip_region, 0, 0))
     {
-	pixman_region_fini (pRegion);
+	pixman_region32_fini (pRegion);
 	return FALSE;
     }
     if (pDst->common.alpha_map)
@@ -165,14 +165,14 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
 			       -pDst->common.alpha_origin.x,
 			       -pDst->common.alpha_origin.y))
 	{
-	    pixman_region_fini (pRegion);
+	    pixman_region32_fini (pRegion);
 	    return FALSE;
 	}
     }
     /* clip against src */
     if (!miClipPictureSrc (pRegion, pSrc, xDst - xSrc, yDst - ySrc))
     {
-	pixman_region_fini (pRegion);
+	pixman_region32_fini (pRegion);
 	return FALSE;
     }
     if (pSrc->common.alpha_map)
@@ -181,7 +181,7 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
 			       xDst - (xSrc + pSrc->common.alpha_origin.x),
 			       yDst - (ySrc + pSrc->common.alpha_origin.y)))
 	{
-	    pixman_region_fini (pRegion);
+	    pixman_region32_fini (pRegion);
 	    return FALSE;
 	}
     }
@@ -190,7 +190,7 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
     {
 	if (!miClipPictureSrc (pRegion, pMask, xDst - xMask, yDst - yMask))
 	{
-	    pixman_region_fini (pRegion);
+	    pixman_region32_fini (pRegion);
 	    return FALSE;
 	}	
 	if (pMask->common.alpha_map)
@@ -199,7 +199,7 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
 				   xDst - (xMask + pMask->common.alpha_origin.x),
 				   yDst - (yMask + pMask->common.alpha_origin.y)))
 	    {
-		pixman_region_fini (pRegion);
+		pixman_region32_fini (pRegion);
 		return FALSE;
 	    }
 	}
@@ -207,3 +207,33 @@ pixman_compute_composite_region (pixman_region16_t *	pRegion,
     
     return TRUE;
 }
+
+PIXMAN_EXPORT pixman_bool_t
+pixman_compute_composite_region (pixman_region16_t *	pRegion,
+				 pixman_image_t *	pSrc,
+				 pixman_image_t *	pMask,
+				 pixman_image_t *	pDst,
+				 int16_t		xSrc,
+				 int16_t		ySrc,
+				 int16_t		xMask,
+				 int16_t		yMask,
+				 int16_t		xDst,
+				 int16_t		yDst,
+				 uint16_t	width,
+				 uint16_t	height)
+{
+    pixman_region32_t r32;
+    pixman_bool_t retval;
+
+    pixman_region32_init (&r32);
+    
+    if (!pixman_region32_copy_from_region16 (&r32, pRegion))
+	return FALSE;
+
+    retval = pixman_compute_composite_region32 (&r32, pSrc, pMask, pDst,
+						xSrc, ySrc, xMask, yMask, xDst, yDst,
+						width, height);
+    
+    pixman_region32_fini (&r32);
+    return retval;
+}
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 6616aa9..61b3fc0 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -81,8 +81,8 @@ allocate_image (void)
     {
 	image_common_t *common = &image->common;
 
-	pixman_region_init (&common->full_region);
-	pixman_region_init (&common->clip_region);
+	pixman_region32_init (&common->full_region);
+	pixman_region32_init (&common->clip_region);
 	common->src_clip = &common->full_region;
 	common->has_client_clip = FALSE;
 	common->transform = NULL;
@@ -119,8 +119,8 @@ pixman_image_unref (pixman_image_t *image)
 
     if (common->ref_count == 0)
     {
-	pixman_region_fini (&common->clip_region);
-	pixman_region_fini (&common->full_region);
+	pixman_region32_fini (&common->clip_region);
+	pixman_region32_fini (&common->full_region);
 
 	if (common->transform)
 	    free (common->transform);
@@ -323,16 +323,16 @@ create_bits (pixman_format_code_t format,
 static void
 reset_clip_region (pixman_image_t *image)
 {
-    pixman_region_fini (&image->common.clip_region);
+    pixman_region32_fini (&image->common.clip_region);
 
     if (image->type == BITS)
     {
-	pixman_region_init_rect (&image->common.clip_region, 0, 0,
-				 image->bits.width, image->bits.height);
+	pixman_region32_init_rect (&image->common.clip_region, 0, 0,
+				   image->bits.width, image->bits.height);
     }
     else
     {
-	pixman_region_init (&image->common.clip_region);
+	pixman_region32_init (&image->common.clip_region);
     }
 }
 
@@ -378,9 +378,9 @@ pixman_image_create_bits (pixman_format_code_t  format,
 								  */
     image->bits.indexed = NULL;
 
-    pixman_region_fini (&image->common.full_region);
-    pixman_region_init_rect (&image->common.full_region, 0, 0,
-			     image->bits.width, image->bits.height);
+    pixman_region32_fini (&image->common.full_region);
+    pixman_region32_init_rect (&image->common.full_region, 0, 0,
+			       image->bits.width, image->bits.height);
 
     reset_clip_region (image);
     return image;
@@ -394,7 +394,7 @@ pixman_image_set_clip_region (pixman_image_t    *image,
 
     if (region)
     {
-	return pixman_region_copy (&common->clip_region, region);
+	return pixman_region32_copy_from_region16 (&common->clip_region, region);
     }
     else
     {
@@ -679,23 +679,23 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 	{
 	    for (i = 0; i < n_rects; ++i)
 	    {
-		pixman_region16_t fill_region;
+		pixman_region32_t fill_region;
 		int n_boxes, j;
-		pixman_box16_t *boxes;
+		pixman_box32_t *boxes;
 
-		pixman_region_init_rect (&fill_region, rects[i].x, rects[i].y, rects[i].width, rects[i].height);
-		pixman_region_intersect (&fill_region, &fill_region, &dest->common.clip_region);
+		pixman_region32_init_rect (&fill_region, rects[i].x, rects[i].y, rects[i].width, rects[i].height);
+		pixman_region32_intersect (&fill_region, &fill_region, &dest->common.clip_region);
 
-		boxes = pixman_region_rectangles (&fill_region, &n_boxes);
+		boxes = pixman_region32_rectangles (&fill_region, &n_boxes);
 		for (j = 0; j < n_boxes; ++j)
 		{
-		    const pixman_box16_t *box = &(boxes[j]);
+		    const pixman_box32_t *box = &(boxes[j]);
 		    pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format),
 				 box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1,
 				 pixel);
 		}
 
-		pixman_region_fini (&fill_region);
+		pixman_region32_fini (&fill_region);
 	    }
 	    return TRUE;
 	}
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 82a0d3c..7f46ccd 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -262,9 +262,9 @@ struct image_common
 {
     image_type_t		type;
     int32_t			ref_count;
-    pixman_region16_t		full_region;
-    pixman_region16_t		clip_region;
-    pixman_region16_t	       *src_clip;
+    pixman_region32_t		full_region;
+    pixman_region32_t		clip_region;
+    pixman_region32_t	       *src_clip;
     pixman_bool_t               has_client_clip;
     pixman_transform_t	       *transform;
     pixman_repeat_t		repeat;
diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c
index 2ba5a70..c37ec52 100644
--- a/pixman/pixman-region.c
+++ b/pixman/pixman-region.c
@@ -305,7 +305,7 @@ PREFIX(_init) (region_type_t *region)
 
 PIXMAN_EXPORT void
 PREFIX(_init_rect) (region_type_t *region,
-			 int x, int y, unsigned int width, unsigned int height)
+		    int x, int y, unsigned int width, unsigned int height)
 {
     region->extents.x1 = x;
     region->extents.y1 = y;
@@ -335,12 +335,6 @@ PREFIX(_n_rects) (region_type_t *region)
 }
 
 PIXMAN_EXPORT box_type_t *
-PREFIX(_rects) (region_type_t *region)
-{
-    return PIXREGION_RECTS (region);
-}
-
-PIXMAN_EXPORT box_type_t *
 PREFIX(_rectangles) (region_type_t *region,
 				  int		    *n_rects)
 {
@@ -2205,7 +2199,7 @@ PREFIX(_selfcheck) (reg)
 
 PIXMAN_EXPORT pixman_bool_t
 PREFIX(_init_rects) (region_type_t *region,
-			  box_type_t *boxes, int count)
+		     box_type_t *boxes, int count)
 {
     int overlap;
 
diff --git a/pixman/pixman-transformed.c b/pixman/pixman-transformed.c
index 569fcae..cff1ba2 100644
--- a/pixman/pixman-transformed.c
+++ b/pixman/pixman-transformed.c
@@ -39,28 +39,28 @@
 /*
  * Fetch from region strategies
  */
-typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box16_t *box);
+typedef FASTCALL uint32_t (*fetchFromRegionProc)(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box32_t *box);
 
 static inline uint32_t
-fbFetchFromNoRegion(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box16_t *box)
+fbFetchFromNoRegion(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box32_t *box)
 {
     return fetch (pict, x, y);
 }
 
 static uint32_t
-fbFetchFromNRectangles(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box16_t *box)
+fbFetchFromNRectangles(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box32_t *box)
 {
-    pixman_box16_t box2;
-    if (pixman_region_contains_point (pict->common.src_clip, x, y, &box2))
+    pixman_box32_t box2;
+    if (pixman_region32_contains_point (pict->common.src_clip, x, y, &box2))
         return fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
     else
         return 0;
 }
 
 static uint32_t
-fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box16_t *box)
+fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetchPixelProc32 fetch, pixman_box32_t *box)
 {
-    pixman_box16_t box2 = *box;
+    pixman_box32_t box2 = *box;
     return ((x < box2.x1) | (x >= box2.x2) | (y < box2.y1) | (y >= box2.y2)) ?
         0 : fbFetchFromNoRegion(pict, x, y, buffer, fetch, box);
 }
@@ -71,7 +71,7 @@ fbFetchFromOneRectangle(bits_image_t *pict, int x, int y, uint32_t *buffer, fetc
 static void
 fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t* box = NULL;
+    pixman_box32_t* box = NULL;
     fetchPixelProc32   fetch;
     fetchFromRegionProc fetchFromRegion;
     int x, y, i;
@@ -79,7 +79,7 @@ fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buff
     /* initialize the two function pointers */
     fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
 
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    if(pixman_region32_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
     else
         fetchFromRegion = fbFetchFromNRectangles;
@@ -117,7 +117,7 @@ fbFetchTransformed_Nearest_Normal(bits_image_t * pict, int width, uint32_t *buff
 static void
 fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t *box = NULL;
+    pixman_box32_t *box = NULL;
     fetchPixelProc32   fetch;
     fetchFromRegionProc fetchFromRegion;
     int x, y, i;
@@ -125,7 +125,7 @@ fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer,
     /* initialize the two function pointers */
     fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
 
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    if(pixman_region32_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
     else
         fetchFromRegion = fbFetchFromNRectangles;
@@ -164,7 +164,7 @@ fbFetchTransformed_Nearest_Pad(bits_image_t * pict, int width, uint32_t *buffer,
 static void
 fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t *box = NULL;
+    pixman_box32_t *box = NULL;
     fetchPixelProc32   fetch;
     fetchFromRegionProc fetchFromRegion;
     int x, y, i;
@@ -172,7 +172,7 @@ fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buf
     /* initialize the two function pointers */
     fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
 
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    if(pixman_region32_n_rects (pict->common.src_clip) == 1)
     {
         box = &(pict->common.src_clip->extents);
         fetchFromRegion = fbFetchFromOneRectangle;
@@ -207,7 +207,7 @@ fbFetchTransformed_Nearest_General(bits_image_t * pict, int width, uint32_t *buf
 static void
 fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t *box = NULL;
+    pixman_box32_t *box = NULL;
     fetchPixelProc32   fetch;
     fetchFromRegionProc fetchFromRegion;
     int i;
@@ -215,7 +215,7 @@ fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buf
     /* initialize the two function pointers */
     fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
 
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    if(pixman_region32_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
     else
         fetchFromRegion = fbFetchFromNRectangles;
@@ -284,7 +284,7 @@ fbFetchTransformed_Bilinear_Normal(bits_image_t * pict, int width, uint32_t *buf
 static void
 fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t *box = NULL;
+    pixman_box32_t *box = NULL;
     fetchPixelProc32   fetch;
     fetchFromRegionProc fetchFromRegion;
     int i;
@@ -292,7 +292,7 @@ fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer
     /* initialize the two function pointers */
     fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
 
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    if(pixman_region32_n_rects (pict->common.src_clip) == 1)
         fetchFromRegion = fbFetchFromNoRegion;
     else
         fetchFromRegion = fbFetchFromNRectangles;
@@ -361,7 +361,7 @@ fbFetchTransformed_Bilinear_Pad(bits_image_t * pict, int width, uint32_t *buffer
 static void
 fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t *box = NULL;
+    pixman_box32_t *box = NULL;
     fetchPixelProc32   fetch;
     fetchFromRegionProc fetchFromRegion;
     int i;
@@ -369,7 +369,7 @@ fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *bu
     /* initialize the two function pointers */
     fetch = ACCESS(pixman_fetchPixelProcForPicture32)(pict);
 
-    if(pixman_region_n_rects (pict->common.src_clip) == 1)
+    if(pixman_region32_n_rects (pict->common.src_clip) == 1)
     {
         box = &(pict->common.src_clip->extents);
         fetchFromRegion = fbFetchFromOneRectangle;
@@ -440,7 +440,7 @@ fbFetchTransformed_Bilinear_General(bits_image_t * pict, int width, uint32_t *bu
 static void
 fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer, uint32_t *mask, uint32_t maskBits, pixman_bool_t affine, pixman_vector_t v, pixman_vector_t unit)
 {
-    pixman_box16_t dummy;
+    pixman_box32_t dummy;
     fetchPixelProc32 fetch;
     int i;
 
@@ -502,7 +502,7 @@ fbFetchTransformed_Convolution(bits_image_t * pict, int width, uint32_t *buffer,
                                 default:
                                     tx = x;
                             }
-                            if (pixman_region_contains_point (pict->common.src_clip, tx, ty, &dummy)) {
+                            if (pixman_region32_contains_point (pict->common.src_clip, tx, ty, &dummy)) {
                                 uint32_t c = fetch(pict, tx, ty);
 
                                 srtot += Red(c) * *p;
diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index a165467..28dacaf 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -116,11 +116,11 @@ dump_image (pixman_image_t *image,
 }
 
 PIXMAN_EXPORT void
-pixman_add_trapezoids       (pixman_image_t      *image,
-			     int16_t              x_off,
-			     int                      y_off,
-			     int                      ntraps,
-			     const pixman_trapezoid_t *traps)
+pixman_add_trapezoids (pixman_image_t           *image,
+		       int16_t                   x_off,
+		       int                       y_off,
+		       int                       ntraps,
+		       const pixman_trapezoid_t *traps)
 {
     int i;
 
diff --git a/pixman/pixman.h b/pixman/pixman.h
index bfe702b..cf9d771 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -278,6 +278,9 @@ void                    pixman_region_init_rect           (pixman_region16_t
 							   int                     y,
 							   unsigned int            width,
 							   unsigned int            height);
+pixman_bool_t           pixman_region_init_rects          (pixman_region16_t      *region,
+							   pixman_box16_t         *boxes,
+							   int                     count);
 void                    pixman_region_init_with_extents   (pixman_region16_t      *region,
 							   pixman_box16_t         *extents);
 void                    pixman_region_fini                (pixman_region16_t      *region);
@@ -323,9 +326,6 @@ pixman_bool_t           pixman_region_equal               (pixman_region16_t
 pixman_bool_t           pixman_region_selfcheck           (pixman_region16_t      *region);
 void                    pixman_region_reset               (pixman_region16_t      *region,
 							   pixman_box16_t         *box);
-pixman_bool_t           pixman_region_init_rects          (pixman_region16_t      *region,
-							   pixman_box16_t         *boxes,
-							   int                     count);
 
 /*
  * 32 bit regions
@@ -365,6 +365,9 @@ void                    pixman_region32_init_rect          (pixman_region32_t *r
 							    int                y,
 							    unsigned int       width,
 							    unsigned int       height);
+pixman_bool_t           pixman_region32_init_rects         (pixman_region32_t *region,
+							    pixman_box32_t    *boxes,
+							    int                count);
 void                    pixman_region32_init_with_extents  (pixman_region32_t *region,
 							    pixman_box32_t    *extents);
 void                    pixman_region32_fini               (pixman_region32_t *region);
@@ -410,9 +413,6 @@ pixman_bool_t           pixman_region32_equal              (pixman_region32_t *r
 pixman_bool_t           pixman_region32_selfcheck          (pixman_region32_t *region);
 void                    pixman_region32_reset              (pixman_region32_t *region,
 							    pixman_box32_t    *box);
-pixman_bool_t           pixman_region32_init_rects         (pixman_region32_t *region,
-							    pixman_box32_t    *boxes,
-							    int                count);
 
 
 /* Copy / Fill / Misc */
commit de150bf82fbe0e346fa38eae10a5bd43538bb3d9
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 22:07:46 2008 -0400

    Add pixman_region32_copy_from_region16

diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index f4db687..82a0d3c 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -694,6 +694,11 @@ pixman_image_can_get_solid (pixman_image_t *image);
 #define PIXMAN_EXPORT
 #endif
 
+/* Helper for 32 bit regions */
+pixman_bool_t
+pixman_region32_copy_from_region16 (pixman_region32_t *dst,
+				    pixman_region16_t *src);
+
 #ifdef PIXMAN_TIMING
 
 /* Timing */
diff --git a/pixman/pixman-region32.c b/pixman/pixman-region32.c
index 5a62b3e..4b5598d 100644
--- a/pixman/pixman-region32.c
+++ b/pixman/pixman-region32.c
@@ -38,4 +38,31 @@ typedef struct {
 
 #define PREFIX(x) pixman_region32##x
 
+pixman_bool_t
+pixman_region32_copy_from_region16 (pixman_region32_t *dst,
+				    pixman_region16_t *src)
+{
+    int n_boxes, i;
+    pixman_box16_t *boxes16;
+    pixman_box32_t *boxes32;
+    
+    boxes16 = pixman_region_rectangles (src, &n_boxes);
+
+    boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t));
+
+    if (!boxes32)
+	return FALSE;
+    
+    for (i = 0; i < n_boxes; ++i)
+    {
+	boxes32[i].x1 = boxes16[i].x1;
+	boxes32[i].y1 = boxes16[i].y1;
+	boxes32[i].x2 = boxes16[i].x2;
+	boxes32[i].y2 = boxes16[i].y2;
+    }
+
+    pixman_region32_fini (dst);
+    return pixman_region32_init_rects (dst, boxes32, n_boxes);
+}
+
 #include "pixman-region.c"
commit e30f7e2eb56b53667ee83e2cad942f171a9486a0
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 21:43:01 2008 -0400

    Some formatting fixing

diff --git a/pixman/pixman.h b/pixman/pixman.h
index ee37f76..bfe702b 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -631,30 +631,30 @@ pixman_bool_t	pixman_image_fill_rectangles	     (pixman_op_t		    op,
 						      const pixman_rectangle16_t   *rects);
 
 /* Composite */
-pixman_bool_t   pixman_compute_composite_region (pixman_region16_t *	pRegion,
-						 pixman_image_t *	pSrc,
-						 pixman_image_t *	pMask,
-						 pixman_image_t *	pDst,
-						 int16_t		xSrc,
-						 int16_t		ySrc,
-						 int16_t		xMask,
-						 int16_t		yMask,
-						 int16_t		xDst,
-						 int16_t		yDst,
-						 uint16_t		width,
-						 uint16_t		height);
-void		pixman_image_composite          (pixman_op_t		    op,
-						 pixman_image_t		   *src,
-						 pixman_image_t               *mask,
-						 pixman_image_t               *dest,
-						 int16_t                       src_x,
-						 int16_t                       src_y,
-						 int16_t                       mask_x,
-						 int16_t                       mask_y,
-						 int16_t                       dest_x,
-						 int16_t                       dest_y,
-						 uint16_t                      width,
-						 uint16_t                      height);
+pixman_bool_t pixman_compute_composite_region (pixman_region16_t *pRegion,
+					       pixman_image_t    *pSrc,
+					       pixman_image_t    *pMask,
+					       pixman_image_t    *pDst,
+					       int16_t            xSrc,
+					       int16_t            ySrc,
+					       int16_t            xMask,
+					       int16_t            yMask,
+					       int16_t            xDst,
+					       int16_t            yDst,
+					       uint16_t           width,
+					       uint16_t           height);
+void          pixman_image_composite          (pixman_op_t        op,
+					       pixman_image_t    *src,
+					       pixman_image_t    *mask,
+					       pixman_image_t    *dest,
+					       int16_t            src_x,
+					       int16_t            src_y,
+					       int16_t            mask_x,
+					       int16_t            mask_y,
+					       int16_t            dest_x,
+					       int16_t            dest_y,
+					       uint16_t           width,
+					       uint16_t           height);
 
 /*
  * Trapezoids
@@ -746,4 +746,5 @@ void           pixman_rasterize_trapezoid  (pixman_image_t            *image,
 					    int                        x_off,
 					    int                        y_off);
 
+
 #endif /* PIXMAN_H__ */
commit cb7cc369f500a7828dc3c9935d8d82af47573df5
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 21:41:54 2008 -0400

    Move all the PIXMAN_EXPORT into .c files

diff --git a/pixman/pixman-compute-region.c b/pixman/pixman-compute-region.c
index 1e566a9..6743304 100644
--- a/pixman/pixman-compute-region.c
+++ b/pixman/pixman-compute-region.c
@@ -123,7 +123,7 @@ miClipPictureSrc (pixman_region16_t *	pRegion,
  * an allocation failure, but rendering ignores those anyways.
  */
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_compute_composite_region (pixman_region16_t *	pRegion,
 				 pixman_image_t *	pSrc,
 				 pixman_image_t *	pMask,
diff --git a/pixman/pixman-edge.c b/pixman/pixman-edge.c
index 717284f..b9246af 100644
--- a/pixman/pixman-edge.c
+++ b/pixman/pixman-edge.c
@@ -315,7 +315,7 @@ PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
 
 #ifndef PIXMAN_FB_ACCESSORS
 
-void
+PIXMAN_EXPORT void
 pixman_rasterize_edges (pixman_image_t *image,
 			pixman_edge_t	*l,
 			pixman_edge_t	*r,
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index f34053c..6616aa9 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -101,7 +101,7 @@ allocate_image (void)
 }
 
 /* Ref Counting */
-pixman_image_t *
+PIXMAN_EXPORT pixman_image_t *
 pixman_image_ref (pixman_image_t *image)
 {
     image->common.ref_count++;
@@ -110,7 +110,7 @@ pixman_image_ref (pixman_image_t *image)
 }
 
 /* returns TRUE when the image is freed */
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_image_unref (pixman_image_t *image)
 {
     image_common_t *common = (image_common_t *)image;
@@ -158,7 +158,7 @@ pixman_image_unref (pixman_image_t *image)
 }
 
 /* Constructors */
-pixman_image_t *
+PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_solid_fill (pixman_color_t *color)
 {
     pixman_image_t *img = allocate_image();
@@ -173,7 +173,7 @@ pixman_image_create_solid_fill (pixman_color_t *color)
     return img;
 }
 
-pixman_image_t *
+PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_linear_gradient (pixman_point_fixed_t         *p1,
 				     pixman_point_fixed_t         *p2,
 				     const pixman_gradient_stop_t *stops,
@@ -206,7 +206,7 @@ pixman_image_create_linear_gradient (pixman_point_fixed_t         *p1,
 }
 
 
-pixman_image_t *
+PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
 				     pixman_point_fixed_t         *outer,
 				     pixman_fixed_t                inner_radius,
@@ -250,7 +250,7 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
     return image;
 }
 
-pixman_image_t *
+PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_conical_gradient (pixman_point_fixed_t *center,
 				      pixman_fixed_t angle,
 				      const pixman_gradient_stop_t *stops,
@@ -336,7 +336,7 @@ reset_clip_region (pixman_image_t *image)
     }
 }
 
-pixman_image_t *
+PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_bits (pixman_format_code_t  format,
 			  int                   width,
 			  int                   height,
@@ -386,7 +386,7 @@ pixman_image_create_bits (pixman_format_code_t  format,
     return image;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_image_set_clip_region (pixman_image_t    *image,
 			      pixman_region16_t *region)
 {
@@ -406,14 +406,14 @@ pixman_image_set_clip_region (pixman_image_t    *image,
 
 /* Sets whether the clip region includes a clip region set by the client
  */
-void
+PIXMAN_EXPORT void
 pixman_image_set_has_client_clip (pixman_image_t *image,
 				  pixman_bool_t	  client_clip)
 {
     image->common.has_client_clip = client_clip;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_image_set_transform (pixman_image_t           *image,
 			    const pixman_transform_t *transform)
 {
@@ -447,14 +447,14 @@ pixman_image_set_transform (pixman_image_t           *image,
     return TRUE;
 }
 
-void
+PIXMAN_EXPORT void
 pixman_image_set_repeat (pixman_image_t  *image,
 			 pixman_repeat_t  repeat)
 {
     image->common.repeat = repeat;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_image_set_filter (pixman_image_t       *image,
 			 pixman_filter_t       filter,
 			 const pixman_fixed_t *params,
@@ -487,7 +487,7 @@ pixman_image_set_filter (pixman_image_t       *image,
     return TRUE;
 }
 
-void
+PIXMAN_EXPORT void
 pixman_image_set_source_clipping (pixman_image_t  *image,
 				  pixman_bool_t    source_clipping)
 {
@@ -503,7 +503,7 @@ pixman_image_set_source_clipping (pixman_image_t  *image,
  * copy the content of indexed. Doing this copying is simply
  * way, way too expensive.
  */
-void
+PIXMAN_EXPORT void
 pixman_image_set_indexed (pixman_image_t	 *image,
 			  const pixman_indexed_t *indexed)
 {
@@ -512,7 +512,7 @@ pixman_image_set_indexed (pixman_image_t	 *image,
     bits->indexed = indexed;
 }
 
-void
+PIXMAN_EXPORT void
 pixman_image_set_alpha_map (pixman_image_t *image,
 			    pixman_image_t *alpha_map,
 			    int16_t         x,
@@ -537,7 +537,7 @@ pixman_image_set_alpha_map (pixman_image_t *image,
     common->alpha_origin.y = y;
 }
 
-void
+PIXMAN_EXPORT void
 pixman_image_set_component_alpha   (pixman_image_t       *image,
 				    pixman_bool_t         component_alpha)
 {
@@ -545,7 +545,7 @@ pixman_image_set_component_alpha   (pixman_image_t       *image,
 }
 
 
-void
+PIXMAN_EXPORT void
 pixman_image_set_accessors (pixman_image_t             *image,
 			    pixman_read_memory_func_t	read_func,
 			    pixman_write_memory_func_t	write_func)
@@ -556,7 +556,7 @@ pixman_image_set_accessors (pixman_image_t             *image,
     image->common.write_func = write_func;
 }
 
-uint32_t *
+PIXMAN_EXPORT uint32_t *
 pixman_image_get_data (pixman_image_t *image)
 {
     if (image->type == BITS)
@@ -565,7 +565,7 @@ pixman_image_get_data (pixman_image_t *image)
     return NULL;
 }
 
-int
+PIXMAN_EXPORT int
 pixman_image_get_width (pixman_image_t *image)
 {
     if (image->type == BITS)
@@ -574,7 +574,7 @@ pixman_image_get_width (pixman_image_t *image)
     return 0;
 }
 
-int
+PIXMAN_EXPORT int
 pixman_image_get_height (pixman_image_t *image)
 {
     if (image->type == BITS)
@@ -583,7 +583,7 @@ pixman_image_get_height (pixman_image_t *image)
     return 0;
 }
 
-int
+PIXMAN_EXPORT int
 pixman_image_get_stride (pixman_image_t *image)
 {
     if (image->type == BITS)
@@ -592,7 +592,7 @@ pixman_image_get_stride (pixman_image_t *image)
     return 0;
 }
 
-int
+PIXMAN_EXPORT int
 pixman_image_get_depth (pixman_image_t *image)
 {
     if (image->type == BITS)
@@ -601,7 +601,7 @@ pixman_image_get_depth (pixman_image_t *image)
     return 0;
 }
 
-pixman_bool_t
+static pixman_bool_t
 color_to_pixel (pixman_color_t *color,
 		uint32_t       *pixel,
 		pixman_format_code_t format)
@@ -642,7 +642,7 @@ color_to_pixel (pixman_color_t *color,
     return TRUE;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_image_fill_rectangles (pixman_op_t		    op,
 			      pixman_image_t		   *dest,
 			      pixman_color_t		   *color,
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index e49a864..127e257 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1725,7 +1725,7 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
 
 __attribute__((__force_align_arg_pointer__))
 #endif
-void
+PIXMAN_EXPORT void
 pixman_image_composite (pixman_op_t      op,
 			pixman_image_t * pSrc,
 			pixman_image_t * pMask,
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 4a6e045..f4db687 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -687,6 +687,13 @@ pixman_bool_t
 pixman_image_can_get_solid (pixman_image_t *image);
 
 
+/* GCC visibility */
+#if defined(__GNUC__) && __GNUC__ >= 4
+#define PIXMAN_EXPORT __attribute__ ((visibility("default")))
+#else
+#define PIXMAN_EXPORT
+#endif
+
 #ifdef PIXMAN_TIMING
 
 /* Timing */
diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index 0dca6cc..a165467 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -29,7 +29,7 @@
 
 typedef uint32_t FbBits;
 
-void
+PIXMAN_EXPORT void
 pixman_add_traps (pixman_image_t *	image,
 		  int16_t	x_off,
 		  int16_t	y_off,
@@ -115,7 +115,7 @@ dump_image (pixman_image_t *image,
     }
 }
 
-void
+PIXMAN_EXPORT void
 pixman_add_trapezoids       (pixman_image_t      *image,
 			     int16_t              x_off,
 			     int                      y_off,
@@ -143,7 +143,7 @@ pixman_add_trapezoids       (pixman_image_t      *image,
 #endif
 }
 
-void
+PIXMAN_EXPORT void
 pixman_rasterize_trapezoid (pixman_image_t *    image,
 			    const pixman_trapezoid_t *trap,
 			    int			x_off,
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index b100d09..ee7db25 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -30,7 +30,7 @@
 #include "pixman-private.h"
 #include "pixman-mmx.h"
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_transform_point_3d (pixman_transform_t *transform,
 			   pixman_vector_t *vector)
 {
@@ -62,7 +62,7 @@ pixman_transform_point_3d (pixman_transform_t *transform,
     return TRUE;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_blt (uint32_t *src_bits,
 	    uint32_t *dst_bits,
 	    int src_stride,
@@ -156,7 +156,7 @@ pixman_fill32 (uint32_t *bits,
     }
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_fill (uint32_t *bits,
 	     int stride,
 	     int bpp,
@@ -204,7 +204,7 @@ pixman_fill (uint32_t *bits,
  * grid row
  */
 
-pixman_fixed_t
+PIXMAN_EXPORT pixman_fixed_t
 pixman_sample_ceil_y (pixman_fixed_t y, int n)
 {
     pixman_fixed_t   f = pixman_fixed_frac(y);
@@ -225,7 +225,7 @@ pixman_sample_ceil_y (pixman_fixed_t y, int n)
  * Compute the largest value no greater than y which is on a
  * grid row
  */
-pixman_fixed_t
+PIXMAN_EXPORT pixman_fixed_t
 pixman_sample_floor_y (pixman_fixed_t y, int n)
 {
     pixman_fixed_t   f = pixman_fixed_frac(y);
@@ -243,7 +243,7 @@ pixman_sample_floor_y (pixman_fixed_t y, int n)
 /*
  * Step an edge by any amount (including negative values)
  */
-void
+PIXMAN_EXPORT void
 pixman_edge_step (pixman_edge_t *e, int n)
 {
     pixman_fixed_48_16_t	ne;
@@ -298,7 +298,7 @@ _pixman_edge_tMultiInit (pixman_edge_t *e, int n, pixman_fixed_t *stepx_p, pixma
  * Initialize one edge structure given the line endpoints and a
  * starting y value
  */
-void
+PIXMAN_EXPORT void
 pixman_edge_init (pixman_edge_t	*e,
 		  int		n,
 		  pixman_fixed_t		y_start,
@@ -342,7 +342,7 @@ pixman_edge_init (pixman_edge_t	*e,
  * Initialize one edge structure given a line, starting y value
  * and a pixel offset for the line
  */
-void
+PIXMAN_EXPORT void
 pixman_line_fixed_edge_init (pixman_edge_t *e,
 			     int	    n,
 			     pixman_fixed_t	    y,
@@ -462,7 +462,7 @@ pixman_version_string (void)
  * Currently, all pixman_format_code_t values are supported
  * except for the YUV formats.
  **/
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_format_supported_destination (pixman_format_code_t format)
 {
     switch (format) {
@@ -530,7 +530,7 @@ pixman_format_supported_destination (pixman_format_code_t format)
  *
  * Currently, all pixman_format_code_t values are supported.
  **/
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 pixman_format_supported_source (pixman_format_code_t format)
 {
     switch (format) {
diff --git a/pixman/pixman.h b/pixman/pixman.h
index e6d9948..ee37f76 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -92,13 +92,6 @@ typedef unsigned __int64 uint64_t;
 #  include <stdint.h>
 #endif
 
-/* GCC visibility */
-#if defined(__GNUC__) && __GNUC__ >= 4
-#define PIXMAN_EXPORT __attribute__ ((visibility("default")))
-#else
-#define PIXMAN_EXPORT
-#endif
-
 /*
  * Boolean
  */
@@ -167,10 +160,6 @@ struct pixman_transform
     pixman_fixed_t	matrix[3][3];
 };
 
-PIXMAN_EXPORT
-pixman_bool_t pixman_transform_point_3d (pixman_transform_t *transform,
-					 pixman_vector_t    *vector);
-
 /* Don't blame me, blame XRender */
 typedef enum
 {
@@ -426,26 +415,30 @@ pixman_bool_t           pixman_region32_init_rects         (pixman_region32_t *r
 							    int                count);
 
 
-/* Copy / Fill */
-PIXMAN_EXPORT
-pixman_bool_t pixman_blt (uint32_t *src_bits,
-			  uint32_t *dst_bits,
-			  int src_stride,
-			  int dst_stride,
-			  int src_bpp,
-			  int dst_bpp,
-			  int src_x, int src_y,
-			  int dst_x, int dst_y,
-			  int width, int height);
-PIXMAN_EXPORT
+/* Copy / Fill / Misc */
+pixman_bool_t pixman_blt  (uint32_t *src_bits,
+			   uint32_t *dst_bits,
+			   int       src_stride,
+			   int       dst_stride,
+			   int       src_bpp,
+			   int       dst_bpp,
+			   int       src_x,
+			   int       src_y,
+			   int       dst_x,
+			   int       dst_y,
+			   int       width,
+			   int       height);
 pixman_bool_t pixman_fill (uint32_t *bits,
-			   int stride,
-			   int bpp,
-			   int x,
-			   int y,
-			   int width,
-			   int height,
-			   uint32_t _xor);
+			   int       stride,
+			   int       bpp,
+			   int       x,
+			   int       y,
+			   int       width,
+			   int       height,
+			   uint32_t  _xor);
+pixman_bool_t pixman_transform_point_3d (pixman_transform_t *transform,
+					 pixman_vector_t    *vector);
+
 /*
  * Images
  */
@@ -570,33 +563,25 @@ typedef enum {
 } pixman_format_code_t;
 
 /* Querying supported format values. */
-PIXMAN_EXPORT
-pixman_bool_t	pixman_format_supported_destination (pixman_format_code_t format);
-
-PIXMAN_EXPORT
-pixman_bool_t	pixman_format_supported_source (pixman_format_code_t format);
+pixman_bool_t pixman_format_supported_destination (pixman_format_code_t format);
+pixman_bool_t pixman_format_supported_source      (pixman_format_code_t format);
 
 /* Constructors */
-PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_solid_fill       (pixman_color_t               *color);
-PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_linear_gradient  (pixman_point_fixed_t         *p1,
 						      pixman_point_fixed_t         *p2,
 						      const pixman_gradient_stop_t *stops,
 						      int                           n_stops);
-PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_radial_gradient  (pixman_point_fixed_t         *inner,
 						      pixman_point_fixed_t         *outer,
 						      pixman_fixed_t                inner_radius,
 						      pixman_fixed_t                outer_radius,
 						      const pixman_gradient_stop_t *stops,
 						      int                           n_stops);
-PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_conical_gradient (pixman_point_fixed_t         *center,
 						      pixman_fixed_t                angle,
 						      const pixman_gradient_stop_t *stops,
 						      int                           n_stops);
-PIXMAN_EXPORT
 pixman_image_t *pixman_image_create_bits             (pixman_format_code_t          format,
 						      int                           width,
 						      int                           height,
@@ -604,67 +589,48 @@ pixman_image_t *pixman_image_create_bits             (pixman_format_code_t
 						      int                           rowstride_bytes);
 
 /* Destructor */
-PIXMAN_EXPORT
 pixman_image_t *pixman_image_ref                     (pixman_image_t               *image);
-PIXMAN_EXPORT
 pixman_bool_t   pixman_image_unref                   (pixman_image_t               *image);
 
 
 /* Set properties */
-PIXMAN_EXPORT
 pixman_bool_t   pixman_image_set_clip_region         (pixman_image_t               *image,
 						      pixman_region16_t            *region);
-PIXMAN_EXPORT
 void		pixman_image_set_has_client_clip     (pixman_image_t               *image,
 						      pixman_bool_t		    clien_clip);
-PIXMAN_EXPORT
 pixman_bool_t   pixman_image_set_transform           (pixman_image_t               *image,
 						      const pixman_transform_t     *transform);
-PIXMAN_EXPORT
 void            pixman_image_set_repeat              (pixman_image_t               *image,
 						      pixman_repeat_t               repeat);
-PIXMAN_EXPORT
 pixman_bool_t   pixman_image_set_filter              (pixman_image_t               *image,
 						      pixman_filter_t               filter,
 						      const pixman_fixed_t         *filter_params,
 						      int                           n_filter_params);
-PIXMAN_EXPORT
 void		pixman_image_set_source_clipping     (pixman_image_t		   *image,
 						      pixman_bool_t                 source_clipping);
-PIXMAN_EXPORT
 void            pixman_image_set_alpha_map           (pixman_image_t               *image,
 						      pixman_image_t               *alpha_map,
 						      int16_t                       x,
 						      int16_t                       y);
-PIXMAN_EXPORT
 void            pixman_image_set_component_alpha     (pixman_image_t               *image,
 						      pixman_bool_t                 component_alpha);
-PIXMAN_EXPORT
 void		pixman_image_set_accessors	     (pixman_image_t		   *image,
 						      pixman_read_memory_func_t	    read_func,
 						      pixman_write_memory_func_t    write_func);
-PIXMAN_EXPORT
 void		pixman_image_set_indexed	     (pixman_image_t		   *image,
 						      const pixman_indexed_t	   *indexed);
-PIXMAN_EXPORT
 uint32_t       *pixman_image_get_data                (pixman_image_t               *image);
-PIXMAN_EXPORT
 int		pixman_image_get_width               (pixman_image_t               *image);
-PIXMAN_EXPORT
 int             pixman_image_get_height              (pixman_image_t               *image);
-PIXMAN_EXPORT
 int		pixman_image_get_stride              (pixman_image_t               *image);
-PIXMAN_EXPORT
 int		pixman_image_get_depth               (pixman_image_t		   *image);
-PIXMAN_EXPORT
 pixman_bool_t	pixman_image_fill_rectangles	     (pixman_op_t		    op,
 						      pixman_image_t		   *image,
 						      pixman_color_t		   *color,
 						      int			    n_rects,
-						      const pixman_rectangle16_t	   *rects);
+						      const pixman_rectangle16_t   *rects);
 
 /* Composite */
-PIXMAN_EXPORT
 pixman_bool_t   pixman_compute_composite_region (pixman_region16_t *	pRegion,
 						 pixman_image_t *	pSrc,
 						 pixman_image_t *	pMask,
@@ -677,7 +643,6 @@ pixman_bool_t   pixman_compute_composite_region (pixman_region16_t *	pRegion,
 						 int16_t		yDst,
 						 uint16_t		width,
 						 uint16_t		height);
-PIXMAN_EXPORT
 void		pixman_image_composite          (pixman_op_t		    op,
 						 pixman_image_t		   *src,
 						 pixman_image_t               *mask,
@@ -742,52 +707,43 @@ struct pixman_trap
     pixman_span_fix_t	top, bot;
 };
 
-PIXMAN_EXPORT
-pixman_fixed_t pixman_sample_ceil_y        (pixman_fixed_t       y,
-					    int                  bpp);
-PIXMAN_EXPORT
-pixman_fixed_t pixman_sample_floor_y       (pixman_fixed_t       y,
-					    int                  bpp);
-PIXMAN_EXPORT
-void           pixman_edge_step            (pixman_edge_t       *e,
-					    int                  n);
-PIXMAN_EXPORT
-void           pixman_edge_init            (pixman_edge_t       *e,
-					    int                  bpp,
-					    pixman_fixed_t       y_start,
-					    pixman_fixed_t       x_top,
-					    pixman_fixed_t       y_top,
-					    pixman_fixed_t       x_bot,
-					    pixman_fixed_t       y_bot);
-PIXMAN_EXPORT
-void           pixman_line_fixed_edge_init (pixman_edge_t       *e,
-					    int                  bpp,
-					    pixman_fixed_t       y,
+pixman_fixed_t pixman_sample_ceil_y        (pixman_fixed_t             y,
+					    int                        bpp);
+pixman_fixed_t pixman_sample_floor_y       (pixman_fixed_t             y,
+					    int                        bpp);
+void           pixman_edge_step            (pixman_edge_t             *e,
+					    int                        n);
+void           pixman_edge_init            (pixman_edge_t             *e,
+					    int                        bpp,
+					    pixman_fixed_t             y_start,
+					    pixman_fixed_t             x_top,
+					    pixman_fixed_t             y_top,
+					    pixman_fixed_t             x_bot,
+					    pixman_fixed_t             y_bot);
+void           pixman_line_fixed_edge_init (pixman_edge_t             *e,
+					    int                        bpp,
+					    pixman_fixed_t             y,
 					    const pixman_line_fixed_t *line,
-					    int                  x_off,
-					    int                  y_off);
-PIXMAN_EXPORT
-void           pixman_rasterize_edges      (pixman_image_t      *image,
-					    pixman_edge_t       *l,
-					    pixman_edge_t       *r,
-					    pixman_fixed_t       t,
-					    pixman_fixed_t       b);
-PIXMAN_EXPORT
-void           pixman_add_traps            (pixman_image_t      *image,
-					    int16_t              x_off,
-					    int16_t              y_off,
-					    int                  ntrap,
-					    pixman_trap_t       *traps);
-PIXMAN_EXPORT
-void	       pixman_add_trapezoids       (pixman_image_t      *image,
-					    int16_t              x_off,
-					    int                  y_off,
-					    int                  ntraps,
+					    int                        x_off,
+					    int                        y_off);
+void           pixman_rasterize_edges      (pixman_image_t            *image,
+					    pixman_edge_t             *l,
+					    pixman_edge_t             *r,
+					    pixman_fixed_t             t,
+					    pixman_fixed_t             b);
+void           pixman_add_traps            (pixman_image_t            *image,
+					    int16_t                    x_off,
+					    int16_t                    y_off,
+					    int                        ntrap,
+					    pixman_trap_t             *traps);
+void           pixman_add_trapezoids       (pixman_image_t            *image,
+					    int16_t                    x_off,
+					    int                        y_off,
+					    int                        ntraps,
 					    const pixman_trapezoid_t  *traps);
-PIXMAN_EXPORT
-void           pixman_rasterize_trapezoid  (pixman_image_t      *image,
+void           pixman_rasterize_trapezoid  (pixman_image_t            *image,
 					    const pixman_trapezoid_t  *trap,
-					    int                  x_off,
-					    int                  y_off);
+					    int                        x_off,
+					    int                        y_off);
 
 #endif /* PIXMAN_H__ */
commit 890f1a4280af4c7b8d8913ba592a9dd617482463
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 21:13:52 2008 -0400

    Move PIXMAN_EXPORT into pixman-region.c

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 38021a5..dbc8d7d 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -1,6 +1,7 @@
 lib_LTLIBRARIES = libpixman-1.la
 libpixman_1_la_LDFLAGS = -version-info $(LT_VERSION_INFO)
 libpixman_1_la_LIBADD = @DEP_LIBS@ -lm
+libpixman_1_la_CFLAGS = -DPIXMAN_DISABLE_DEPRECATED
 libpixman_1_la_SOURCES =		\
 	pixman.h			\
 	pixman-access.c			\
diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c
index 41a9d4c..2ba5a70 100644
--- a/pixman/pixman-region.c
+++ b/pixman/pixman-region.c
@@ -95,7 +95,7 @@ static region_data_type_t *pixman_brokendata = (region_data_type_t *)&PREFIX(_br
  * the addresses of those structs which makes the existing code continue to
  * work.
  */
-void
+PIXMAN_EXPORT void
 PREFIX(_set_static_pointers) (box_type_t *empty_box,
 				   region_data_type_t *empty_data,
 				   region_data_type_t *broken_data)
@@ -296,14 +296,14 @@ PREFIX(_print) (rgn)
 }
 
 
-void
+PIXMAN_EXPORT void
 PREFIX(_init) (region_type_t *region)
 {
     region->extents = *pixman_region_emptyBox;
     region->data = pixman_region_emptyData;
 }
 
-void
+PIXMAN_EXPORT void
 PREFIX(_init_rect) (region_type_t *region,
 			 int x, int y, unsigned int width, unsigned int height)
 {
@@ -314,33 +314,33 @@ PREFIX(_init_rect) (region_type_t *region,
     region->data = NULL;
 }
 
-void
+PIXMAN_EXPORT void
 PREFIX(_init_with_extents) (region_type_t *region, box_type_t *extents)
 {
     region->extents = *extents;
     region->data = NULL;
 }
 
-void
+PIXMAN_EXPORT void
 PREFIX(_fini) (region_type_t *region)
 {
     good (region);
     freeData (region);
 }
 
-int
+PIXMAN_EXPORT int
 PREFIX(_n_rects) (region_type_t *region)
 {
     return PIXREGION_NUM_RECTS (region);
 }
 
-box_type_t *
+PIXMAN_EXPORT box_type_t *
 PREFIX(_rects) (region_type_t *region)
 {
     return PIXREGION_RECTS (region);
 }
 
-box_type_t *
+PIXMAN_EXPORT box_type_t *
 PREFIX(_rectangles) (region_type_t *region,
 				  int		    *n_rects)
 {
@@ -403,7 +403,7 @@ pixman_rect_alloc (region_type_t * region, int n)
     return TRUE;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_copy) (region_type_t *dst, region_type_t *src)
 {
     good(dst);
@@ -967,7 +967,7 @@ pixman_region_intersectO (region_type_t *region,
     return TRUE;
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_intersect) (region_type_t * 	newReg,
 			 region_type_t * 	reg1,
 			 region_type_t *	reg2)
@@ -1124,7 +1124,7 @@ pixman_region_unionO (
 /* Convenience function for performing union of region with a
  * single rectangle
  */
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_union_rect) (region_type_t *dest,
 			  region_type_t *source,
 			  int x, int y,
@@ -1143,7 +1143,7 @@ PREFIX(_union_rect) (region_type_t *dest,
     return PREFIX(_union) (dest, source, &region);
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_union) (region_type_t *newReg,
 		     region_type_t *reg1,
 		     region_type_t *reg2)
@@ -1243,7 +1243,7 @@ PREFIX(_union) (region_type_t *newReg,
  *      dstrgn is modified if rgn has rectangles.
  *
  */
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_append) (region_type_t * dstrgn,
 		      region_type_t * rgn)
 {
@@ -1428,7 +1428,7 @@ QuickSortRects(
  *-----------------------------------------------------------------------
  */
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_validate) (region_type_t * badreg,
 		       int *pOverlap)
 {
@@ -1771,7 +1771,7 @@ pixman_region_subtractO (
  *
  *-----------------------------------------------------------------------
  */
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_subtract) (region_type_t *	regD,
 		       region_type_t * 	regM,
 		       region_type_t *	regS)
@@ -1836,7 +1836,7 @@ PREFIX(_subtract) (region_type_t *	regD,
  *-----------------------------------------------------------------------
  */
 pixman_bool_t
-PREFIX(_inverse) (region_type_t * 	  newReg,       /* Destination region */
+PIXMAN_EXPORT PREFIX(_inverse) (region_type_t * 	  newReg,       /* Destination region */
 		      region_type_t * 	  reg1,         /* Region to invert */
 		      box_type_t *     	  invRect) 	/* Bounding box for inversion */
 {
@@ -1895,7 +1895,7 @@ PREFIX(_inverse) (region_type_t * 	  newReg,       /* Destination region */
  */
 
 pixman_region_overlap_t
-PREFIX(_contains_rectangle) (region_type_t *  region,
+PIXMAN_EXPORT PREFIX(_contains_rectangle) (region_type_t *  region,
 				 box_type_t *     prect)
 {
     int	x;
@@ -1999,7 +1999,7 @@ PREFIX(_contains_rectangle) (region_type_t *  region,
    translates in place
 */
 
-void
+PIXMAN_EXPORT void
 PREFIX(_translate) (region_type_t * region, int x, int y)
 {
     int x1, x2, y1, y2;
@@ -2081,7 +2081,7 @@ PREFIX(_translate) (region_type_t * region, int x, int y)
     }
 }
 
-void
+PIXMAN_EXPORT void
 PREFIX(_reset) (region_type_t *region, box_type_t *box)
 {
     good(region);
@@ -2093,7 +2093,7 @@ PREFIX(_reset) (region_type_t *region, box_type_t *box)
 }
 
 /* box is "return" value */
-int
+PIXMAN_EXPORT int
 PREFIX(_contains_point) (region_type_t * region,
 			     int x, int y,
 			     box_type_t * box)
@@ -2126,14 +2126,14 @@ PREFIX(_contains_point) (region_type_t * region,
     return(FALSE);
 }
 
-int
+PIXMAN_EXPORT int
 PREFIX(_not_empty) (region_type_t * region)
 {
     good(region);
     return(!PIXREGION_NIL(region));
 }
 
-void
+PIXMAN_EXPORT void
 PREFIX(_empty) (region_type_t * region)
 {
     good(region);
@@ -2143,7 +2143,7 @@ PREFIX(_empty) (region_type_t * region)
     region->data = pixman_region_emptyData;
 }
 
-box_type_t *
+PIXMAN_EXPORT box_type_t *
 PREFIX(_extents) (region_type_t * region)
 {
     good(region);
@@ -2157,7 +2157,7 @@ PREFIX(_extents) (region_type_t * region)
     returns the number of new, clipped scanlines.
 */
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_selfcheck) (reg)
     region_type_t * reg;
 {
@@ -2203,7 +2203,7 @@ PREFIX(_selfcheck) (reg)
     }
 }
 
-pixman_bool_t
+PIXMAN_EXPORT pixman_bool_t
 PREFIX(_init_rects) (region_type_t *region,
 			  box_type_t *boxes, int count)
 {
diff --git a/pixman/pixman-region16.c b/pixman/pixman-region16.c
index 0eea051..8d4a050 100644
--- a/pixman/pixman-region16.c
+++ b/pixman/pixman-region16.c
@@ -26,6 +26,8 @@
 #include <config.h>
 #endif
 
+#undef PIXMAN_DISABLE_DEPRECATED
+
 #include "pixman-private.h"
 
 typedef pixman_box16_t		box_type_t;
diff --git a/pixman/pixman.h b/pixman/pixman.h
index c493457..e6d9948 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -277,82 +277,66 @@ typedef enum
 /* This function exists only to make it possible to preserve the X ABI - it should
  * go away at first opportunity.
  */
-PIXMAN_EXPORT
-void		        pixman_region_set_static_pointers (pixman_box16_t *empty_box,
+void                    pixman_region_set_static_pointers (pixman_box16_t         *empty_box,
 							   pixman_region16_data_t *empty_data,
 							   pixman_region16_data_t *broken_data);
 
+
 /* creation/destruction */
-PIXMAN_EXPORT
-void                    pixman_region_init              (pixman_region16_t *region);
-PIXMAN_EXPORT
-void                    pixman_region_init_rect         (pixman_region16_t *region,
-							 int                x,
-							 int                y,
-							 unsigned int       width,
-							 unsigned int       height);
-PIXMAN_EXPORT
-void                    pixman_region_init_with_extents (pixman_region16_t *region,
-							 pixman_box16_t    *extents);
-PIXMAN_EXPORT
-void                    pixman_region_fini              (pixman_region16_t *region);
+void                    pixman_region_init                (pixman_region16_t      *region);
+void                    pixman_region_init_rect           (pixman_region16_t      *region,
+							   int                     x,
+							   int                     y,
+							   unsigned int            width,
+							   unsigned int            height);
+void                    pixman_region_init_with_extents   (pixman_region16_t      *region,
+							   pixman_box16_t         *extents);
+void                    pixman_region_fini                (pixman_region16_t      *region);
+
 
 /* manipulation */
-PIXMAN_EXPORT
-void                    pixman_region_translate  (pixman_region16_t *region,
-						  int                x,
-						  int                y);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_copy       (pixman_region16_t *dest,
-						  pixman_region16_t *source);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_intersect  (pixman_region16_t *newReg,
-						  pixman_region16_t *reg1,
-						  pixman_region16_t *reg2);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_union      (pixman_region16_t *newReg,
-						  pixman_region16_t *reg1,
-						  pixman_region16_t *reg2);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_union_rect (pixman_region16_t *dest,
-						  pixman_region16_t *source,
-						  int                x,
-						  int                y,
-						  unsigned int       width,
-						  unsigned int       height);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_subtract   (pixman_region16_t *regD,
-						  pixman_region16_t *regM,
-						  pixman_region16_t *regS);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_inverse    (pixman_region16_t *newReg,
-						  pixman_region16_t *reg1,
-						  pixman_box16_t    *invRect);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_contains_point (pixman_region16_t *region,
-						      int x, int y, pixman_box16_t *box);
-PIXMAN_EXPORT
-pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *pixman_region16_t,
-							  pixman_box16_t *prect);
-PIXMAN_EXPORT
-pixman_bool_t           pixman_region_not_empty (pixman_region16_t *region);
-PIXMAN_EXPORT
-pixman_box16_t *        pixman_region_extents (pixman_region16_t *region);
-PIXMAN_EXPORT
-int                     pixman_region_n_rects (pixman_region16_t *region);
-PIXMAN_EXPORT
-pixman_box16_t *        pixman_region_rectangles (pixman_region16_t *region,
-						  int		    *n_rects);
-PIXMAN_EXPORT
-pixman_bool_t		pixman_region_equal (pixman_region16_t *region1,
-					     pixman_region16_t *region2);
-PIXMAN_EXPORT
-pixman_bool_t		pixman_region_selfcheck (pixman_region16_t *region);
-PIXMAN_EXPORT
-void			pixman_region_reset (pixman_region16_t *region, pixman_box16_t *box);
-PIXMAN_EXPORT
-pixman_bool_t		pixman_region_init_rects (pixman_region16_t *region,
-						  pixman_box16_t *boxes, int count);
+void                    pixman_region_translate           (pixman_region16_t      *region,
+							   int                     x,
+							   int                     y);
+pixman_bool_t           pixman_region_copy                (pixman_region16_t      *dest,
+							   pixman_region16_t      *source);
+pixman_bool_t           pixman_region_intersect           (pixman_region16_t      *newReg,
+							   pixman_region16_t      *reg1,
+							   pixman_region16_t      *reg2);
+pixman_bool_t           pixman_region_union               (pixman_region16_t      *newReg,
+							   pixman_region16_t      *reg1,
+							   pixman_region16_t      *reg2);
+pixman_bool_t           pixman_region_union_rect          (pixman_region16_t      *dest,
+							   pixman_region16_t      *source,
+							   int                     x,
+							   int                     y,
+							   unsigned int            width,
+							   unsigned int            height);
+pixman_bool_t           pixman_region_subtract            (pixman_region16_t      *regD,
+							   pixman_region16_t      *regM,
+							   pixman_region16_t      *regS);
+pixman_bool_t           pixman_region_inverse             (pixman_region16_t      *newReg,
+							   pixman_region16_t      *reg1,
+							   pixman_box16_t         *invRect);
+pixman_bool_t           pixman_region_contains_point      (pixman_region16_t      *region,
+							   int                     x,
+							   int                     y,
+							   pixman_box16_t         *box);
+pixman_region_overlap_t pixman_region_contains_rectangle  (pixman_region16_t      *pixman_region16_t,
+							   pixman_box16_t         *prect);
+pixman_bool_t           pixman_region_not_empty           (pixman_region16_t      *region);
+pixman_box16_t *        pixman_region_extents             (pixman_region16_t      *region);
+int                     pixman_region_n_rects             (pixman_region16_t      *region);
+pixman_box16_t *        pixman_region_rectangles          (pixman_region16_t      *region,
+							   int                    *n_rects);
+pixman_bool_t           pixman_region_equal               (pixman_region16_t      *region1,
+							   pixman_region16_t      *region2);
+pixman_bool_t           pixman_region_selfcheck           (pixman_region16_t      *region);
+void                    pixman_region_reset               (pixman_region16_t      *region,
+							   pixman_box16_t         *box);
+pixman_bool_t           pixman_region_init_rects          (pixman_region16_t      *region,
+							   pixman_box16_t         *boxes,
+							   int                     count);
 
 /*
  * 32 bit regions
@@ -386,78 +370,57 @@ struct pixman_region32
 };
 
 /* creation/destruction */
-PIXMAN_EXPORT
 void                    pixman_region32_init               (pixman_region32_t *region);
-PIXMAN_EXPORT
 void                    pixman_region32_init_rect          (pixman_region32_t *region,
 							    int                x,
 							    int                y,
 							    unsigned int       width,
 							    unsigned int       height);
-PIXMAN_EXPORT
 void                    pixman_region32_init_with_extents  (pixman_region32_t *region,
 							    pixman_box32_t    *extents);
-PIXMAN_EXPORT
 void                    pixman_region32_fini               (pixman_region32_t *region);
 
 
 /* manipulation */
-PIXMAN_EXPORT
 void                    pixman_region32_translate          (pixman_region32_t *region,
 							    int                x,
 							    int                y);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_copy               (pixman_region32_t *dest,
 							    pixman_region32_t *source);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_intersect          (pixman_region32_t *newReg,
 							    pixman_region32_t *reg1,
 							    pixman_region32_t *reg2);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_union              (pixman_region32_t *newReg,
 							    pixman_region32_t *reg1,
 							    pixman_region32_t *reg2);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_union_rect         (pixman_region32_t *dest,
 							    pixman_region32_t *source,
 							    int                x,
 							    int                y,
 							    unsigned int       width,
 							    unsigned int       height);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_subtract           (pixman_region32_t *regD,
 							    pixman_region32_t *regM,
 							    pixman_region32_t *regS);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_inverse            (pixman_region32_t *newReg,
 							    pixman_region32_t *reg1,
 							    pixman_box32_t    *invRect);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_contains_point     (pixman_region32_t *region,
 							    int                x,
 							    int                y,
 							    pixman_box32_t    *box);
-PIXMAN_EXPORT
 pixman_region_overlap_t pixman_region32_contains_rectangle (pixman_region32_t *region,
 							    pixman_box32_t    *prect);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_not_empty          (pixman_region32_t *region);
-PIXMAN_EXPORT
 pixman_box32_t *        pixman_region32_extents            (pixman_region32_t *region);
-PIXMAN_EXPORT
 int                     pixman_region32_n_rects            (pixman_region32_t *region);
-PIXMAN_EXPORT
 pixman_box32_t *        pixman_region32_rectangles         (pixman_region32_t *region,
 							    int               *n_rects);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_equal              (pixman_region32_t *region1,
 							    pixman_region32_t *region2);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_selfcheck          (pixman_region32_t *region);
-PIXMAN_EXPORT
 void                    pixman_region32_reset              (pixman_region32_t *region,
 							    pixman_box32_t    *box);
-PIXMAN_EXPORT
 pixman_bool_t           pixman_region32_init_rects         (pixman_region32_t *region,
 							    pixman_box32_t    *boxes,
 							    int                count);
commit 7a32c864e95d35e13d5473f5519639d91f62e20a
Merge: 81369a4... 1248418...
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 20:14:52 2008 -0400

    Merge branch 'master' into region32

commit 1248418854b0e6e2f7fa8c2760a05b6604d3ded7
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 20:01:37 2008 -0400

    Fix forgotten use of pixman-combine.h

diff --git a/pixman/combine.pl b/pixman/combine.pl
index 7258ff3..f8df603 100644
--- a/pixman/combine.pl
+++ b/pixman/combine.pl
@@ -75,6 +75,7 @@ while (<STDIN>) {
 
     # Change the header for the 64-bit version
     s/pixman-combine.h/pixman-combine64.h/ if $size == 16;
+    s/pixman-combine.h/pixman-combine32.h/ if $size == 8;
 
     print;
 }
commit fb8f17fdf1eaec4ab8edba1486bfa83c0965d738
Merge: 9267b0b... 1063933...
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 19:55:43 2008 -0400

    Merge branch 'vmx'

commit 1063933bacb8b5d06b42b7b06a116339ce7c1f0c
Author: Søren Sandmann <sandmann at redhat.com>
Date:   Sun Jun 8 19:55:35 2008 -0400

    Rename pixman-combine.h -> pixman-combin32.h

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index de9327c..377f958 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -28,9 +28,9 @@ libpixmanincludedir = $(includedir)/pixman-1/
 libpixmaninclude_HEADERS = pixman.h pixman-version.h
 noinst_LTLIBRARIES = 
 
-pixman-combine32.c : combine.inc pixman-combine.h combine.pl
+pixman-combine32.c : combine.inc pixman-combine32.h combine.pl
 	$(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1)
-pixman-combine.h : combine.h.inc combine.pl
+pixman-combine32.h : combine.h.inc combine.pl
 	$(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1)
 
 pixman-combine64.c : combine.inc pixman-combine64.h combine.pl
@@ -39,7 +39,7 @@ pixman-combine64.h : combine.h.inc combine.pl
 	$(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1)
 
 EXTRA_DIST = Makefile.win32 combine.inc combine.pl
-CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine.h pixman-combine64.h
+CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine32.h pixman-combine64.h
 
 # mmx code
 if USE_MMX
@@ -58,7 +58,7 @@ noinst_LTLIBRARIES += libpixman-vmx.la
 libpixman_vmx_la_SOURCES = \
 	pixman-vmx.c \
 	pixman-vmx.h \
-	pixman-combine.h
+	pixman-combine32.h
 libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
 libpixman_vmx_la_LIBADD = $(DEP_LIBS)
 libpixman_1_la_LIBADD += libpixman-vmx.la
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 76f3592..8c8a2a3 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -27,7 +27,7 @@
 
 #include <config.h>
 #include "pixman-vmx.h"
-#include "pixman-combine.h"
+#include "pixman-combine32.h"
 #include <altivec.h>
 
 #ifdef __GNUC__
commit 567b4c255050ee3cc2dd0c03fb091d1f981332eb
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sat Jun 7 19:38:01 2008 +0200

    Use sigaction instead of signal to restore the previous handler

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 948c666..e49a864 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1923,15 +1923,19 @@ pixman_bool_t pixman_have_vmx (void) {
 #else
 #include <signal.h>
 
-static void vmx_test (int sig) {
+static void vmx_test(int sig, siginfo_t *si, void *unused) {
     have_vmx = FALSE;
 }
 
 pixman_bool_t pixman_have_vmx (void) {
+    struct sigaction sa, osa;
     if (!initialized) {
-        signal(SIGILL, vmx_test);
+        sa.sa_flags = SA_SIGINFO;
+        sigemptyset(&sa.sa_mask);
+        sa.sa_sigaction = vmx_test;
+        sigaction(SIGILL, &sa, &osa);
         asm volatile ( "vor 0, 0, 0" );
-        signal(SIGILL, SIG_DFL);
+        sigaction(SIGILL, &osa, NULL);
         initialized = TRUE;
     }
     return have_vmx;
commit 7ef19261ee5bb4c78ca55533c67e1f267faed61e
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sat Jun 7 19:28:10 2008 +0200

    Use combine macros from the generated header

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index f0770a6..de9327c 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -56,8 +56,9 @@ endif
 if USE_VMX
 noinst_LTLIBRARIES += libpixman-vmx.la
 libpixman_vmx_la_SOURCES = \
-	pixman-vmx.c		\
-	pixman-vmx.h
+	pixman-vmx.c \
+	pixman-vmx.h \
+	pixman-combine.h
 libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
 libpixman_vmx_la_LIBADD = $(DEP_LIBS)
 libpixman_1_la_LIBADD += libpixman-vmx.la
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 87dc4d1..76f3592 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -27,172 +27,13 @@
 
 #include <config.h>
 #include "pixman-vmx.h"
+#include "pixman-combine.h"
 #include <altivec.h>
 
 #ifdef __GNUC__
 #   define inline __inline__ __attribute__ ((__always_inline__))
 #endif
 
-#define Alpha(x) ((x) >> 24)
-
-/*
-  x_c = (x_c * a) / 255
-*/
-#define FbByteMul(x, a) do {					    \
-        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
-        t &= 0xff00ff;						    \
-								    \
-        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
-        x = (x + ((x >> 8) & 0xff00ff));			    \
-        x &= 0xff00ff00;					    \
-        x += t;							    \
-    } while (0)
-
-/*
-  x_c = (x_c * a) / 255 + y
-*/
-#define FbByteMulAdd(x, a, y) do {				    \
-        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
-        t &= 0xff00ff;						    \
-        t += y & 0xff00ff;					    \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
-        t &= 0xff00ff;						    \
-								    \
-        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
-        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
-        x &= 0xff00ff;                                              \
-        x += (y >> 8) & 0xff00ff;                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
-        x &= 0xff00ff;                                              \
-        x <<= 8;                                                    \
-        x += t;                                                     \
-    } while (0)
-
-/*
-  x_c = (x_c * a + y_c * b) / 255
-*/
-#define FbByteAddMul(x, a, y, b) do {                                   \
-        uint32_t t;							\
-        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
-        t += (t >> 8) + 0x8000;                                         \
-        t >>= 16;                                                       \
-									\
-        t |= r << 16;                                                   \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        t &= 0xff00ff;                                                  \
-        t <<= 8;                                                        \
-									\
-        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
-        x += (x >> 8);                                                  \
-        x >>= 8;                                                        \
-        x |= r << 16;                                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
-        x &= 0xff00ff;                                                  \
-        x |= t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a_c) / 255
-*/
-#define FbByteMulC(x, a) do {				  \
-        uint32_t t;                                       \
-        uint32_t r = (x & 0xff) * (a & 0xff);             \
-        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
-	r += 0x800080;					  \
-        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
-        r &= 0xff00ff;					  \
-							  \
-        x >>= 8;					  \
-        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
-        t |= (x & 0xff0000) * (a >> 24);		  \
-        t += 0x800080;					  \
-        t = t + ((t >> 8) & 0xff00ff);			  \
-        x = r | (t & 0xff00ff00);			  \
-							  \
-    } while (0)
-
-/*
-  x_c = (x_c * a) / 255 + y
-*/
-#define FbByteMulAddC(x, a, y) do {				      \
-        uint32_t t;                                                   \
-        uint32_t r = (x & 0xff) * (a & 0xff);                         \
-        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
-	r += 0x800080;						      \
-	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
-        r &= 0xff00ff;						      \
-        r += y & 0xff00ff;					      \
-        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
-        r &= 0xff00ff;						      \
-								      \
-        x >>= 8;                                                       \
-        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
-        t |= (x & 0xff0000) * (a >> 24);                               \
-	t += 0x800080;                                                 \
-        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
-        t &= 0xff00ff;                                                 \
-        t += (y >> 8) & 0xff00ff;                                      \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
-        t &= 0xff00ff;                                                 \
-        x = r | (t << 8);                                              \
-    } while (0)
-
-/*
-  x_c = (x_c * a_c + y_c * b) / 255
-*/
-#define FbByteAddMulC(x, a, y, b) do {                                  \
-        uint32_t t;							\
-        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
-        r += (r >> 8) + 0x80;                                           \
-        r >>= 8;                                                        \
-									\
-        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
-        t += (t >> 8) + 0x8000;                                         \
-        t >>= 16;                                                       \
-									\
-        t |= r << 16;                                                   \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        t &= 0xff00ff;                                                  \
-        t <<= 8;                                                        \
-									\
-        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
-        r += (r >> 8);                                                  \
-        r >>= 8;                                                        \
-									\
-        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
-        x += (x >> 8);                                                  \
-        x >>= 8;                                                        \
-        x |= r << 16;                                                   \
-        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
-        x &= 0xff00ff;                                                  \
-        x |= t;                                                         \
-    } while (0)
-
-/*
-  x_c = min(x_c + y_c, 255)
-*/
-#define FbByteAdd(x, y) do {                                            \
-        uint32_t t;							\
-        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
-        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
-        r &= 0xff00ff;                                                  \
-									\
-        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
-        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
-        r |= (t & 0xff00ff) << 8;                                       \
-        x = r;                                                          \
-    } while (0)
-
 static inline vector unsigned int
 splat_alpha (vector unsigned int pix) {
     return vec_perm (pix, pix,
commit 795fd8a4c0f9417fb92beaff8595064c573b7652
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sat Jun 7 19:25:09 2008 +0200

    Split combine.inc generated files in source and header

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index effa959..f0770a6 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -28,14 +28,18 @@ libpixmanincludedir = $(includedir)/pixman-1/
 libpixmaninclude_HEADERS = pixman.h pixman-version.h
 noinst_LTLIBRARIES = 
 
-pixman-combine32.c : combine.inc combine.pl
+pixman-combine32.c : combine.inc pixman-combine.h combine.pl
 	$(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1)
+pixman-combine.h : combine.h.inc combine.pl
+	$(PERL) $(srcdir)/combine.pl 8 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1)
 
-pixman-combine64.c : combine.inc combine.pl
+pixman-combine64.c : combine.inc pixman-combine64.h combine.pl
 	$(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.inc > $@ || ($(RM) $@; exit 1)
+pixman-combine64.h : combine.h.inc combine.pl
+	$(PERL) $(srcdir)/combine.pl 16 < $(srcdir)/combine.h.inc > $@ || ($(RM) $@; exit 1)
 
 EXTRA_DIST = Makefile.win32 combine.inc combine.pl
-CLEANFILES = pixman-combine32.c pixman-combine64.c
+CLEANFILES = pixman-combine32.c pixman-combine64.c pixman-combine.h pixman-combine64.h
 
 # mmx code
 if USE_MMX
diff --git a/pixman/combine.h.inc b/pixman/combine.h.inc
new file mode 100644
index 0000000..7dd97ae
--- /dev/null
+++ b/pixman/combine.h.inc
@@ -0,0 +1,215 @@
+
+#define COMPONENT_SIZE
+#define MASK
+#define ONE_HALF
+
+#define G_SHIFT
+#define B_SHIFT
+#define A_SHIFT
+#define G_MASK
+#define B_MASK
+#define A_MASK
+
+#define RB_MASK
+#define AG_MASK
+#define RB_ONE_HALF
+#define RB_MASK_PLUS_ONE
+
+#define Alpha(x) ((x) >> A_SHIFT)
+
+/*
+ * Helper macros.
+ */
+
+#define IntMult(a,b,t) ( (t) = (a) * (b) + ONE_HALF, ( ( ( (t)>>G_SHIFT ) + (t) )>>G_SHIFT ) )
+#define IntDiv(a,b)    (((comp2_t) (a) * MASK) / (b))
+
+#define GetComp(v,i)   ((comp2_t) (comp1_t) ((v) >> i))
+
+#define Add(x,y,i,t)   ((t) = GetComp(x,i) + GetComp(y,i),              \
+                        (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
+
+#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (IntMult(GetComp(y,i),ay,(u)) + \
+					 IntMult(GetComp(x,i),ax,(v))), \
+				  (comp4_t) ((comp1_t) ((t) |		\
+							 (0 - ((t) >> G_SHIFT)))) << (i))
+
+/*
+  The methods below use some tricks to be able to do two color
+  components at the same time.
+*/
+
+/*
+  x_c = (x_c * a) / 255
+*/
+#define FbByteMul(x, a) do {                                            \
+        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
+        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+        t &= RB_MASK;                                                   \
+                                                                        \
+        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
+        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK));                    \
+        x &= RB_MASK << COMPONENT_SIZE;                                 \
+        x += t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAdd(x, a, y) do {                                      \
+        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
+        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+        t &= RB_MASK;                                                   \
+        t += y & RB_MASK;                                               \
+        t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK);      \
+        t &= RB_MASK;                                                   \
+                                                                        \
+        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
+        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+        x &= RB_MASK;                                                   \
+        x += (y >> COMPONENT_SIZE) & RB_MASK;                           \
+        x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK);      \
+        x &= RB_MASK;                                                   \
+        x <<= COMPONENT_SIZE;                                           \
+        x += t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c * b) / 255
+*/
+#define FbByteAddMul(x, a, y, b) do {                                   \
+        comp4_t t;                                                      \
+        comp4_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \
+        r += (r >> G_SHIFT);                                            \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        t = (x & G_MASK) * a + (y & G_MASK) * b;                        \
+        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
+        t >>= B_SHIFT;                                                  \
+                                                                        \
+        t |= r << B_SHIFT;                                              \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        t &= RB_MASK;                                                   \
+        t <<= G_SHIFT;                                                  \
+                                                                        \
+        r = ((x >> B_SHIFT) & MASK) * a +                               \
+            ((y >> B_SHIFT) & MASK) * b + ONE_HALF;                     \
+        r += (r >> G_SHIFT);                                            \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        x = (x & MASK) * a + (y & MASK) * b + ONE_HALF;                 \
+        x += (x >> G_SHIFT);                                            \
+        x >>= G_SHIFT;                                                  \
+        x |= r << B_SHIFT;                                              \
+        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
+        x &= RB_MASK;                                                   \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c *b) / 256
+*/
+#define FbByteAddMul_256(x, a, y, b) do {                               \
+        comp4_t t = (x & RB_MASK) * a + (y & RB_MASK) * b;              \
+        t >>= G_SHIFT;                                                  \
+        t &= RB_MASK;                                                   \
+                                                                        \
+        x = ((x >> G_SHIFT) & RB_MASK) * a +                            \
+            ((y >> G_SHIFT) & RB_MASK) * b;                             \
+        x &= AG_MASK;                                                   \
+        x += t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c) / 255
+*/
+#define FbByteMulC(x, a) do {                                           \
+        comp4_t t;                                                      \
+        comp4_t r = (x & MASK) * (a & MASK);                            \
+        r |= (x & B_MASK) * ((a >> B_SHIFT) & MASK);                    \
+        r += RB_ONE_HALF;                                               \
+        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        r &= RB_MASK;                                                   \
+                                                                        \
+        x >>= G_SHIFT;                                                  \
+        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
+        t |= (x & B_MASK) * (a >> A_SHIFT);                             \
+        t += RB_ONE_HALF;                                               \
+        t = t + ((t >> G_SHIFT) & RB_MASK);                             \
+        x = r | (t & AG_MASK);                                          \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAddC(x, a, y) do {                                     \
+        comp4_t t;                                                      \
+        comp4_t r = (x & MASK) * (a & MASK);                            \
+        r |= (x & B_MASK) * ((a >> B_SHIFT) & MASK);                    \
+        r += RB_ONE_HALF;                                               \
+        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        r &= RB_MASK;                                                   \
+        r += y & RB_MASK;                                               \
+        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
+        r &= RB_MASK;                                                   \
+                                                                        \
+        x >>= G_SHIFT;                                                  \
+        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
+        t |= (x & B_MASK) * (a >> A_SHIFT);                             \
+        t += RB_ONE_HALF;                                               \
+        t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+        t &= RB_MASK;                                                   \
+        t += (y >> G_SHIFT) & RB_MASK;                                  \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        t &= RB_MASK;                                                   \
+        x = r | (t << G_SHIFT);                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c + y_c * b) / 255
+*/
+#define FbByteAddMulC(x, a, y, b) do {                                  \
+        comp4_t t;                                                      \
+        comp4_t r = (x >> A_SHIFT) * (a >> A_SHIFT) +                   \
+                     (y >> A_SHIFT) * b;                                \
+        r += (r >> G_SHIFT) + ONE_HALF;                                 \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b;  \
+        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
+        t >>= B_SHIFT;                                                  \
+                                                                        \
+        t |= r << B_SHIFT;                                              \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        t &= RB_MASK;                                                   \
+        t <<= G_SHIFT;                                                  \
+                                                                        \
+        r = ((x >> B_SHIFT) & MASK) * ((a >> B_SHIFT) & MASK) +         \
+            ((y >> B_SHIFT) & MASK) * b + ONE_HALF;                     \
+        r += (r >> G_SHIFT);                                            \
+        r >>= G_SHIFT;                                                  \
+                                                                        \
+        x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF;        \
+        x += (x >> G_SHIFT);                                            \
+        x >>= G_SHIFT;                                                  \
+        x |= r << B_SHIFT;                                              \
+        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
+        x &= RB_MASK;                                                   \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = min(x_c + y_c, 255)
+*/
+#define FbByteAdd(x, y) do {                                            \
+        comp4_t t;                                                      \
+        comp4_t r = (x & RB_MASK) + (y & RB_MASK);                      \
+        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
+        r &= RB_MASK;                                                   \
+                                                                        \
+        t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK);    \
+        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+        r |= (t & RB_MASK) << G_SHIFT;                                  \
+        x = r;                                                          \
+    } while (0)
+
diff --git a/pixman/combine.inc b/pixman/combine.inc
index 63a3fe1..9f88dee 100644
--- a/pixman/combine.inc
+++ b/pixman/combine.inc
@@ -6,204 +6,7 @@
 
 #include "pixman-private.h"
 
-#define Alpha(x) ((x) >> A_SHIFT)
-
-/*
- * Helper macros.
- */
-
-#define IntMult(a,b,t) ( (t) = (a) * (b) + ONE_HALF, ( ( ( (t)>>G_SHIFT ) + (t) )>>G_SHIFT ) )
-#define IntDiv(a,b)    (((comp2_t) (a) * MASK) / (b))
-
-#define GetComp(v,i)   ((comp2_t) (comp1_t) ((v) >> i))
-
-#define Add(x,y,i,t)   ((t) = GetComp(x,i) + GetComp(y,i),              \
-                        (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
-
-#define FbGen(x,y,i,ax,ay,t,u,v) ((t) = (IntMult(GetComp(y,i),ay,(u)) + \
-					 IntMult(GetComp(x,i),ax,(v))), \
-				  (comp4_t) ((comp1_t) ((t) |		\
-							 (0 - ((t) >> G_SHIFT)))) << (i))
-
-/*
-  The methods below use some tricks to be able to do two color
-  components at the same time.
-*/
-
-/*
-  x_c = (x_c * a) / 255
-*/
-#define FbByteMul(x, a) do {                                            \
-        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
-        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
-        t &= RB_MASK;                                                   \
-                                                                        \
-        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
-        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK));                    \
-        x &= RB_MASK << COMPONENT_SIZE;                                 \
-        x += t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a) / 255 + y
-*/
-#define FbByteMulAdd(x, a, y) do {                                      \
-        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
-        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
-        t &= RB_MASK;                                                   \
-        t += y & RB_MASK;                                               \
-        t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK);      \
-        t &= RB_MASK;                                                   \
-                                                                        \
-        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
-        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
-        x &= RB_MASK;                                                   \
-        x += (y >> COMPONENT_SIZE) & RB_MASK;                           \
-        x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK);      \
-        x &= RB_MASK;                                                   \
-        x <<= COMPONENT_SIZE;                                           \
-        x += t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a + y_c * b) / 255
-*/
-#define FbByteAddMul(x, a, y, b) do {                                   \
-        comp4_t t;                                                      \
-        comp4_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \
-        r += (r >> G_SHIFT);                                            \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        t = (x & G_MASK) * a + (y & G_MASK) * b;                        \
-        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
-        t >>= B_SHIFT;                                                  \
-                                                                        \
-        t |= r << B_SHIFT;                                              \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        t &= RB_MASK;                                                   \
-        t <<= G_SHIFT;                                                  \
-                                                                        \
-        r = ((x >> B_SHIFT) & MASK) * a +                               \
-            ((y >> B_SHIFT) & MASK) * b + ONE_HALF;                     \
-        r += (r >> G_SHIFT);                                            \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        x = (x & MASK) * a + (y & MASK) * b + ONE_HALF;                 \
-        x += (x >> G_SHIFT);                                            \
-        x >>= G_SHIFT;                                                  \
-        x |= r << B_SHIFT;                                              \
-        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
-        x &= RB_MASK;                                                   \
-        x |= t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a + y_c *b) / 256
-*/
-#define FbByteAddMul_256(x, a, y, b) do {                               \
-        comp4_t t = (x & RB_MASK) * a + (y & RB_MASK) * b;              \
-        t >>= G_SHIFT;                                                  \
-        t &= RB_MASK;                                                   \
-                                                                        \
-        x = ((x >> G_SHIFT) & RB_MASK) * a +                            \
-            ((y >> G_SHIFT) & RB_MASK) * b;                             \
-        x &= AG_MASK;                                                   \
-        x += t;                                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a_c) / 255
-*/
-#define FbByteMulC(x, a) do {                                           \
-        comp4_t t;                                                      \
-        comp4_t r = (x & MASK) * (a & MASK);                            \
-        r |= (x & B_MASK) * ((a >> B_SHIFT) & MASK);                    \
-        r += RB_ONE_HALF;                                               \
-        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
-        r &= RB_MASK;                                                   \
-                                                                        \
-        x >>= G_SHIFT;                                                  \
-        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
-        t |= (x & B_MASK) * (a >> A_SHIFT);                             \
-        t += RB_ONE_HALF;                                               \
-        t = t + ((t >> G_SHIFT) & RB_MASK);                             \
-        x = r | (t & AG_MASK);                                          \
-    } while (0)
-
-/*
-  x_c = (x_c * a) / 255 + y
-*/
-#define FbByteMulAddC(x, a, y) do {                                     \
-        comp4_t t;                                                      \
-        comp4_t r = (x & MASK) * (a & MASK);                            \
-        r |= (x & B_MASK) * ((a >> B_SHIFT) & MASK);                    \
-        r += RB_ONE_HALF;                                               \
-        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
-        r &= RB_MASK;                                                   \
-        r += y & RB_MASK;                                               \
-        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
-        r &= RB_MASK;                                                   \
-                                                                        \
-        x >>= G_SHIFT;                                                  \
-        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
-        t |= (x & B_MASK) * (a >> A_SHIFT);                             \
-        t += RB_ONE_HALF;                                               \
-        t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
-        t &= RB_MASK;                                                   \
-        t += (y >> G_SHIFT) & RB_MASK;                                  \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        t &= RB_MASK;                                                   \
-        x = r | (t << G_SHIFT);                                         \
-    } while (0)
-
-/*
-  x_c = (x_c * a_c + y_c * b) / 255
-*/
-#define FbByteAddMulC(x, a, y, b) do {                                  \
-        comp4_t t;                                                      \
-        comp4_t r = (x >> A_SHIFT) * (a >> A_SHIFT) +                   \
-                     (y >> A_SHIFT) * b;                                \
-        r += (r >> G_SHIFT) + ONE_HALF;                                 \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b;  \
-        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
-        t >>= B_SHIFT;                                                  \
-                                                                        \
-        t |= r << B_SHIFT;                                              \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        t &= RB_MASK;                                                   \
-        t <<= G_SHIFT;                                                  \
-                                                                        \
-        r = ((x >> B_SHIFT) & MASK) * ((a >> B_SHIFT) & MASK) +         \
-            ((y >> B_SHIFT) & MASK) * b + ONE_HALF;                     \
-        r += (r >> G_SHIFT);                                            \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF;        \
-        x += (x >> G_SHIFT);                                            \
-        x >>= G_SHIFT;                                                  \
-        x |= r << B_SHIFT;                                              \
-        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
-        x &= RB_MASK;                                                   \
-        x |= t;                                                         \
-    } while (0)
-
-/*
-  x_c = min(x_c + y_c, 255)
-*/
-#define FbByteAdd(x, y) do {                                            \
-        comp4_t t;                                                      \
-        comp4_t r = (x & RB_MASK) + (y & RB_MASK);                      \
-        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
-        r &= RB_MASK;                                                   \
-                                                                        \
-        t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK);    \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        r |= (t & RB_MASK) << G_SHIFT;                                  \
-        x = r;                                                          \
-    } while (0)
-
+#include "pixman-combine.h"
 
 /*
  * There are two ways of handling alpha -- either as a single unified value or
diff --git a/pixman/combine.pl b/pixman/combine.pl
index ba13d6c..7258ff3 100644
--- a/pixman/combine.pl
+++ b/pixman/combine.pl
@@ -27,30 +27,38 @@ print "/* WARNING: This file is generated by combine.pl from combine.inc.\n";
 print "   Please edit one of those files rather than this one. */\n";
 print "\n";
 
-# Mask and 1/2 value for a single component.
-print "#define COMPONENT_SIZE ", $size, "\n";
-print "#define MASK ", mask($mask), "\n";
-print "#define ONE_HALF ", mask($one_half), "\n";
-print "\n";
-
-# Shifts and masks for green, blue, and alpha.
-print "#define G_SHIFT ", $size, "\n";
-print "#define B_SHIFT ", $size * 2, "\n";
-print "#define A_SHIFT ", $size * 3, "\n";
-print "#define G_MASK ", mask($mask . $zero_mask), "\n";
-print "#define B_MASK ", mask($mask . $zero_mask x 2), "\n";
-print "#define A_MASK ", mask($mask . $zero_mask x 3), "\n";
-print "\n";
+print "#line 1 \"combine.inc\"\n";
 
-# Special values for dealing with red + blue at the same time.
-print "#define RB_MASK ", mask($mask . $zero_mask . $mask), "\n";
-print "#define AG_MASK ", mask($mask . $zero_mask . $mask . $zero_mask), "\n";
-print "#define RB_ONE_HALF ", mask($one_half . $zero_mask . $one_half), "\n";
-print "#define RB_MASK_PLUS_ONE ", mask("1" . $zero_mask x 2 . "1" .  $zero_mask), "\n";
-print "\n";
+$mask_ = mask($mask);
+$one_half_ = mask($one_half);
+$g_mask = mask($mask . $zero_mask);
+$b_mask = mask($mask . $zero_mask x 2);
+$a_mask = mask($mask . $zero_mask x 3);
+$rb_mask = mask($mask . $zero_mask . $mask);
+$ag_mask = mask($mask . $zero_mask . $mask . $zero_mask);
+$rb_one_half = mask($one_half . $zero_mask . $one_half);
+$rb_mask_plus_one = mask("1" . $zero_mask x 2 . "1" .  $zero_mask);
 
-print "#line 1 \"combine.inc\"\n";
 while (<STDIN>) {
+    # Mask and 1/2 value for a single component.
+    s/#define COMPONENT_SIZE\b/$& $size/;
+    s/#define MASK\b/$& $mask_/;
+    s/#define ONE_HALF\b/$& $one_half_/;
+
+    # Shifts and masks for green, blue, and alpha.
+    s/#define G_SHIFT\b/$& $size/;
+    s/#define B_SHIFT\b/$& $size * 2/;
+    s/#define A_SHIFT\b/$& $size * 3/;
+    s/#define G_MASK\b/$& $g_mask/;
+    s/#define B_MASK\b/$& $b_mask/;
+    s/#define A_MASK\b/$& $a_mask/;
+
+    # Special values for dealing with red + blue at the same time.
+    s/#define RB_MASK\b/$& $rb_mask/;
+    s/#define AG_MASK\b/$& $ag_mask/;
+    s/#define RB_ONE_HALF\b/$& $rb_one_half/;
+    s/#define RB_MASK_PLUS_ONE\b/$& $rb_mask_plus_one/;
+
     # Add 32/64 suffix to combining function types.
     s/\bCombineFuncC\b/CombineFuncC$pixel_size/;
     s/\bCombineFuncU\b/CombineFuncU$pixel_size/;
@@ -65,5 +73,8 @@ while (<STDIN>) {
     # Change the function table name for the 64-bit version.
     s/pixman_composeFunctions/pixman_composeFunctions64/ if $size == 16;
 
+    # Change the header for the 64-bit version
+    s/pixman-combine.h/pixman-combine64.h/ if $size == 16;
+
     print;
 }
commit 8ef3f49a9580fb148c2e5f567c0aafddd4b0f136
Merge: 27b753c... 9a6d3a1...
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sun Jun 1 16:37:52 2008 +0200

    Fixup

diff --cc configure.ac
index 3b73d7f,988bee1..6365c4c
--- a/configure.ac
+++ b/configure.ac
@@@ -229,42 -237,17 +237,50 @@@ dnl ===================================
  AC_SUBST(MMX_CFLAGS)
  AC_SUBST(SSE_CFLAGS)
  
 +dnl Check for VMX/Altivec
 +if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
 +    VMX_CFLAGS="-faltivec"
 +else
 +    VMX_CFLAGS="-maltivec -mabi=altivec"
 +fi
 +
 +have_vmx_intrinsics=no
 +AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics)
 +xserver_save_CFLAGS=$CFLAGS
 +CFLAGS="$CFLAGS $VMX_CFLAGS"
 +AC_COMPILE_IFELSE([
 +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
 +#error "Need GCC >= 3.4 for sane altivec support"
 +#endif
 +#include <altivec.h>
 +int main () {
 +    vector unsigned int v = vec_splat_u32 (1);
 +    v = vec_sub (v, v);
 +    return 0;
 +}], have_vmx_intrinsics=yes)
 +CFLAGS=$xserver_save_CFLAGS
 +AC_MSG_RESULT($have_vmx_intrinsics)
 +
 +if test $have_vmx_intrinsics = yes ; then
 +   AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
 +else
 +   VMX_CFLAGS=
 +fi
 +AC_SUBST(VMX_CFLAGS)
 +
 +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
 +
- dnl ===========================================================================
+ AC_ARG_ENABLE(gtk,
+    [AC_HELP_STRING([--disable-gtk],
+                    [disable tests using GTK+])],
+    [disable_gtk=yes], [disable_gtk=no])
+ 
+ if test $disable_gtk = no ; then
+    PKG_CHECK_MODULES(GTK, [gtk+-2.0], [HAVE_GTK=yes], [HAVE_GTK=no])
+ else
+    HAVE_GTK=no
+ fi
  
- PKG_CHECK_MODULES(GTK, [gtk+-2.0], [HAVE_GTK=yes], [HAVE_GTK=no])
  AM_CONDITIONAL(HAVE_GTK, [test "x$HAVE_GTK" = xyes])
  
  AC_SUBST(GTK_CFLAGS)
diff --cc pixman/pixman-pict.c
index 9147af7,1479670..948c666
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@@ -1679,10 -1744,10 +1752,14 @@@ pixman_image_composite (pixman_op_
      fbComposeSetupMMX();
  #endif
  
 +#ifdef USE_VMX
 +    fbComposeSetupVMX();
 +#endif
 +
+ #ifdef USE_SSE2
+     fbComposeSetupSSE();
+ #endif
+ 
      if (srcRepeat && srcTransform &&
  	pSrc->bits.width == 1 &&
  	pSrc->bits.height == 1)
diff --cc pixman/pixman-vmx.c
index ac050a4,0000000..87dc4d1
mode 100644,000000..100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@@ -1,1225 -1,0 +1,1227 @@@
 +/*
 + * Copyright © 2007 Luca Barbato
 + *
 + * Permission to use, copy, modify, distribute, and sell this software and its
 + * documentation for any purpose is hereby granted without fee, provided that
 + * the above copyright notice appear in all copies and that both that
 + * copyright notice and this permission notice appear in supporting
 + * documentation, and that the name of Luca Barbato not be used in advertising or
 + * publicity pertaining to distribution of the software without specific,
 + * written prior permission.  Luca Barbato makes no representations about the
 + * suitability of this software for any purpose.  It is provided "as is"
 + * without express or implied warranty.
 + *
 + * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
 + * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
 + * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
 + * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
 + * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
 + * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 + * SOFTWARE.
 + *
 + * Author:  Luca Barbato (lu_zero at gentoo.org)
 + *
 + * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
 + */
 +
 +#include <config.h>
 +#include "pixman-vmx.h"
 +#include <altivec.h>
 +
 +#ifdef __GNUC__
 +#   define inline __inline__ __attribute__ ((__always_inline__))
 +#endif
 +
++#define Alpha(x) ((x) >> 24)
++
 +/*
 +  x_c = (x_c * a) / 255
 +*/
 +#define FbByteMul(x, a) do {					    \
 +        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
 +        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
 +        t &= 0xff00ff;						    \
 +								    \
 +        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
 +        x = (x + ((x >> 8) & 0xff00ff));			    \
 +        x &= 0xff00ff00;					    \
 +        x += t;							    \
 +    } while (0)
 +
 +/*
 +  x_c = (x_c * a) / 255 + y
 +*/
 +#define FbByteMulAdd(x, a, y) do {				    \
 +        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
 +        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
 +        t &= 0xff00ff;						    \
 +        t += y & 0xff00ff;					    \
 +        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
 +        t &= 0xff00ff;						    \
 +								    \
 +        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
 +        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
 +        x &= 0xff00ff;                                              \
 +        x += (y >> 8) & 0xff00ff;                                   \
 +        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
 +        x &= 0xff00ff;                                              \
 +        x <<= 8;                                                    \
 +        x += t;                                                     \
 +    } while (0)
 +
 +/*
 +  x_c = (x_c * a + y_c * b) / 255
 +*/
 +#define FbByteAddMul(x, a, y, b) do {                                   \
 +        uint32_t t;							\
 +        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
 +        r += (r >> 8);                                                  \
 +        r >>= 8;                                                        \
 +									\
 +        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
 +        t += (t >> 8) + 0x8000;                                         \
 +        t >>= 16;                                                       \
 +									\
 +        t |= r << 16;                                                   \
 +        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
 +        t &= 0xff00ff;                                                  \
 +        t <<= 8;                                                        \
 +									\
 +        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
 +        r += (r >> 8);                                                  \
 +        r >>= 8;                                                        \
 +									\
 +        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
 +        x += (x >> 8);                                                  \
 +        x >>= 8;                                                        \
 +        x |= r << 16;                                                   \
 +        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
 +        x &= 0xff00ff;                                                  \
 +        x |= t;                                                         \
 +    } while (0)
 +
 +/*
 +  x_c = (x_c * a_c) / 255
 +*/
 +#define FbByteMulC(x, a) do {				  \
 +        uint32_t t;                                       \
 +        uint32_t r = (x & 0xff) * (a & 0xff);             \
 +        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
 +	r += 0x800080;					  \
 +        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
 +        r &= 0xff00ff;					  \
 +							  \
 +        x >>= 8;					  \
 +        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
 +        t |= (x & 0xff0000) * (a >> 24);		  \
 +        t += 0x800080;					  \
 +        t = t + ((t >> 8) & 0xff00ff);			  \
 +        x = r | (t & 0xff00ff00);			  \
 +							  \
 +    } while (0)
 +
 +/*
 +  x_c = (x_c * a) / 255 + y
 +*/
 +#define FbByteMulAddC(x, a, y) do {				      \
 +        uint32_t t;                                                   \
 +        uint32_t r = (x & 0xff) * (a & 0xff);                         \
 +        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
 +	r += 0x800080;						      \
 +	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
 +        r &= 0xff00ff;						      \
 +        r += y & 0xff00ff;					      \
 +        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
 +        r &= 0xff00ff;						      \
 +								      \
 +        x >>= 8;                                                       \
 +        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
 +        t |= (x & 0xff0000) * (a >> 24);                               \
 +	t += 0x800080;                                                 \
 +        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
 +        t &= 0xff00ff;                                                 \
 +        t += (y >> 8) & 0xff00ff;                                      \
 +        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
 +        t &= 0xff00ff;                                                 \
 +        x = r | (t << 8);                                              \
 +    } while (0)
 +
 +/*
 +  x_c = (x_c * a_c + y_c * b) / 255
 +*/
 +#define FbByteAddMulC(x, a, y, b) do {                                  \
 +        uint32_t t;							\
 +        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
 +        r += (r >> 8) + 0x80;                                           \
 +        r >>= 8;                                                        \
 +									\
 +        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
 +        t += (t >> 8) + 0x8000;                                         \
 +        t >>= 16;                                                       \
 +									\
 +        t |= r << 16;                                                   \
 +        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
 +        t &= 0xff00ff;                                                  \
 +        t <<= 8;                                                        \
 +									\
 +        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
 +        r += (r >> 8);                                                  \
 +        r >>= 8;                                                        \
 +									\
 +        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
 +        x += (x >> 8);                                                  \
 +        x >>= 8;                                                        \
 +        x |= r << 16;                                                   \
 +        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
 +        x &= 0xff00ff;                                                  \
 +        x |= t;                                                         \
 +    } while (0)
 +
 +/*
 +  x_c = min(x_c + y_c, 255)
 +*/
 +#define FbByteAdd(x, y) do {                                            \
 +        uint32_t t;							\
 +        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
 +        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
 +        r &= 0xff00ff;                                                  \
 +									\
 +        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
 +        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
 +        r |= (t & 0xff00ff) << 8;                                       \
 +        x = r;                                                          \
 +    } while (0)
 +
 +static inline vector unsigned int
 +splat_alpha (vector unsigned int pix) {
 +    return vec_perm (pix, pix,
 +    (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04,
 +                               0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C));
 +}
 +
 +static inline vector unsigned int
 +pix_multiply (vector unsigned int p, vector unsigned int a)
 +{
 +    vector unsigned short hi, lo, mod;
 +    /* unpack to short */
 +    hi = (vector unsigned short)
 +                    vec_mergeh ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)p);
 +    mod = (vector unsigned short)
 +                    vec_mergeh ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)a);
 +
 +    hi = vec_mladd (hi, mod, (vector unsigned short)
 +                            AVV(0x0080,0x0080,0x0080,0x0080,
 +                                 0x0080,0x0080,0x0080,0x0080));
 +
 +    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
 +
 +    hi = vec_sr (hi, vec_splat_u16 (8));
 +
 +    /* unpack to short */
 +    lo = (vector unsigned short)
 +                    vec_mergel ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)p);
 +    mod = (vector unsigned short)
 +                    vec_mergel ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)a);
 +
 +    lo = vec_mladd (lo, mod, (vector unsigned short)
 +                            AVV(0x0080,0x0080,0x0080,0x0080,
 +                                 0x0080,0x0080,0x0080,0x0080));
 +
 +    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
 +
 +    lo = vec_sr (lo, vec_splat_u16 (8));
 +
 +    return (vector unsigned int)vec_packsu (hi, lo);
 +}
 +
 +static inline vector unsigned int
 +pix_add (vector unsigned int a, vector unsigned int b)
 +{
 +    return (vector unsigned int)vec_adds ((vector unsigned char)a,
 +                     (vector unsigned char)b);
 +}
 +
 +static inline vector unsigned int
 +pix_add_mul (vector unsigned int x, vector unsigned int a,
 +             vector unsigned int y, vector unsigned int b)
 +{
 +    vector unsigned short hi, lo, mod, hiy, loy, mody;
 +
 +    hi = (vector unsigned short)
 +                    vec_mergeh ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)x);
 +    mod = (vector unsigned short)
 +                    vec_mergeh ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)a);
 +    hiy = (vector unsigned short)
 +                    vec_mergeh ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)y);
 +    mody = (vector unsigned short)
 +                    vec_mergeh ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)b);
 +
 +    hi = vec_mladd (hi, mod, (vector unsigned short)
 +                             AVV(0x0080,0x0080,0x0080,0x0080,
 +                                  0x0080,0x0080,0x0080,0x0080));
 +
 +    hi = vec_mladd (hiy, mody, hi);
 +
 +    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
 +
 +    hi = vec_sr (hi, vec_splat_u16 (8));
 +
 +    lo = (vector unsigned short)
 +                    vec_mergel ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)x);
 +    mod = (vector unsigned short)
 +                    vec_mergel ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)a);
 +
 +    loy = (vector unsigned short)
 +                    vec_mergel ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)y);
 +    mody = (vector unsigned short)
 +                    vec_mergel ((vector unsigned char)AVV(0),
 +                                (vector unsigned char)b);
 +
 +    lo = vec_mladd (lo, mod, (vector unsigned short)
 +                             AVV(0x0080,0x0080,0x0080,0x0080,
 +                                  0x0080,0x0080,0x0080,0x0080));
 +
 +    lo = vec_mladd (loy, mody, lo);
 +
 +    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
 +
 +    lo = vec_sr (lo, vec_splat_u16 (8));
 +
 +    return (vector unsigned int)vec_packsu (hi, lo);
 +}
 +
 +static inline vector unsigned int
 +negate (vector unsigned int src)
 +{
 +    return vec_nor (src, src);
 +}
 +/* dest*~srca + src */
 +static inline vector unsigned int
 +over (vector unsigned int src, vector unsigned int srca,
 +      vector unsigned int dest)
 +{
 +    vector unsigned char tmp = (vector unsigned char)
 +                                pix_multiply (dest, negate (srca));
 +    tmp = vec_adds ((vector unsigned char)src, tmp);
 +    return (vector unsigned int)tmp;
 +}
 +
 +/* in == pix_multiply */
 +#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\
 +                                             pix_multiply (srca, mask), dest)
 +
 +
 +#define COMPUTE_SHIFT_MASK(source) \
 +    source ## _mask = vec_lvsl (0, source);
 +
 +#define COMPUTE_SHIFT_MASKS(dest, source) \
 +    dest ## _mask = vec_lvsl (0, dest); \
 +    source ## _mask = vec_lvsl (0, source); \
 +    store_mask = vec_lvsr (0, dest);
 +
 +#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
 +    mask ## _mask = vec_lvsl (0, mask); \
 +    dest ## _mask = vec_lvsl (0, dest); \
 +    source ## _mask = vec_lvsl (0, source); \
 +    store_mask = vec_lvsr (0, dest);
 +
 +/* notice you have to declare temp vars...
 + * Note: tmp3 and tmp4 must remain untouched!
 + */
 +
 +#define LOAD_VECTORS(dest, source) \
 +        tmp1 = (typeof(tmp1))vec_ld(0, source); \
 +        tmp2 = (typeof(tmp2))vec_ld(15, source); \
 +        tmp3 = (typeof(tmp3))vec_ld(0, dest); \
 +        v ## source = (typeof(v ## source)) \
 +                       vec_perm(tmp1, tmp2, source ## _mask); \
 +        tmp4 = (typeof(tmp4))vec_ld(15, dest); \
 +        v ## dest = (typeof(v ## dest)) \
 +                     vec_perm(tmp3, tmp4, dest ## _mask);
 +
 +#define LOAD_VECTORSC(dest, source, mask) \
 +        tmp1 = (typeof(tmp1))vec_ld(0, source); \
 +        tmp2 = (typeof(tmp2))vec_ld(15, source); \
 +        tmp3 = (typeof(tmp3))vec_ld(0, dest); \
 +        v ## source = (typeof(v ## source)) \
 +                       vec_perm(tmp1, tmp2, source ## _mask); \
 +        tmp4 = (typeof(tmp4))vec_ld(15, dest); \
 +        tmp1 = (typeof(tmp1))vec_ld(0, mask); \
 +        v ## dest = (typeof(v ## dest)) \
 +                     vec_perm(tmp3, tmp4, dest ## _mask); \
 +        tmp2 = (typeof(tmp2))vec_ld(15, mask); \
 +        v ## mask = (typeof(v ## mask)) \
 +                     vec_perm(tmp1, tmp2, mask ## _mask);
 +#define STORE_VECTOR(dest) \
 +        edges = vec_perm (tmp4, tmp3, dest ## _mask); \
 +        tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
 +        tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
 +        vec_st ((vector unsigned int) tmp3, 15, dest ); \
 +        vec_st ((vector unsigned int) tmp1, 0, dest );
 +
 +static FASTCALL void
 +vmxCombineMaskU (uint32_t *src, const uint32_t *msk, int width)
 +{
 +    int i;
 +    vector unsigned int  vsrc, vmsk;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         src_mask, msk_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(src, msk)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(src, msk)
 +
 +        vsrc = pix_multiply (vsrc, splat_alpha (vmsk));
 +
 +        STORE_VECTOR(src)
 +
 +        msk+=4;
 +        src+=4;
 +    }
 +
 +    for (i = width%4; --i >= 0;) {
 +        uint32_t a = msk[i] >> 24;
 +        uint32_t s = src[i];
 +        FbByteMul (s, a);
 +        src[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = over (vsrc, splat_alpha (vsrc), vdest);
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t ia = Alpha (~s);
 +
 +        FbByteMulAdd (d, ia, s);
 +        dest[i] = d;
 +    }
 +}
 +
 +
 +static FASTCALL void
 +vmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = over (vdest, splat_alpha (vdest) , vsrc);
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t ia = Alpha (~dest[i]);
 +
 +        FbByteMulAdd (s, ia, d);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineInU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_multiply (vsrc, splat_alpha (vdest));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +
 +        uint32_t s = src[i];
 +        uint32_t a = Alpha (dest[i]);
 +        FbByteMul (s, a);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_multiply (vdest, splat_alpha (vsrc));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t d = dest[i];
 +        uint32_t a = Alpha (src[i]);
 +        FbByteMul (d, a);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOutU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t a = Alpha (~dest[i]);
 +        FbByteMul (s, a);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t d = dest[i];
 +        uint32_t a = Alpha (~src[i]);
 +        FbByteMul (d, a);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_add_mul (vsrc, splat_alpha (vdest),
 +                            vdest, splat_alpha (negate (vsrc)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t dest_a = Alpha (d);
 +        uint32_t src_ia = Alpha (~s);
 +
 +        FbByteAddMul (s, dest_a, d, src_ia);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_add_mul (vdest, splat_alpha (vsrc),
 +                            vsrc, splat_alpha (negate (vdest)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t src_a = Alpha (s);
 +        uint32_t dest_ia = Alpha (~d);
 +
 +        FbByteAddMul (s, dest_ia, d, src_a);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineXorU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS (dest, src)
 +
 +        vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
 +                            vdest, splat_alpha (negate (vsrc)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t src_ia = Alpha (~s);
 +        uint32_t dest_ia = Alpha (~d);
 +
 +        FbByteAddMul (s, dest_ia, d, src_ia);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKS(dest, src)
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORS(dest, src)
 +
 +        vdest = pix_add (vsrc, vdest);
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        FbByteAdd (d, s);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask);
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_multiply (vsrc, vmask);
 +
 +        STORE_VECTOR(dest)
 +
 +        mask+=4;
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        FbByteMulC (s, a);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask);
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
 +
 +        STORE_VECTOR(dest)
 +
 +        mask+=4;
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        FbByteMulC (s, a);
 +        FbByteMulAddC (d, ~a, s);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask);
 +    /* printf("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC (dest, src, mask)
 +
 +        vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
 +
 +        STORE_VECTOR(dest)
 +
 +        mask+=4;
 +        src+=4;
 +        dest+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t da = Alpha (d);
 +        FbByteMulC (s, a);
 +        FbByteMulAddC (s, ~da, d);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t da = Alpha (dest[i]);
 +        FbByteMul (s, a);
 +        FbByteMul (s, da);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t d = dest[i];
 +        uint32_t sa = Alpha (src[i]);
 +        FbByteMul (a, sa);
 +        FbByteMulC (d, a);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t da = Alpha (~d);
 +        FbByteMulC (s, a);
 +        FbByteMulC (s, da);
 +        dest[i] = s;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_multiply (vdest,
 +                             negate (pix_multiply (vmask, splat_alpha (vsrc))));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t sa = Alpha (s);
 +        FbByteMulC (a, sa);
 +        FbByteMulC (d, ~a);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
 +                            vdest,
 +                            negate (pix_multiply (vmask,
 +                                                splat_alpha (vmask))));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t sa = Alpha (s);
 +        uint32_t da = Alpha (d);
 +
 +        FbByteMulC (s, a);
 +        FbByteMul (a, sa);
 +        FbByteAddMulC (d, ~a, s, da);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_add_mul (vdest,
 +                            pix_multiply (vmask, splat_alpha (vsrc)),
 +                            pix_multiply (vsrc, vmask),
 +                            negate (splat_alpha (vdest)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t sa = Alpha (s);
 +        uint32_t da = Alpha (d);
 +
 +        FbByteMulC (s, a);
 +        FbByteMul (a, sa);
 +        FbByteAddMulC (d, a, s, ~da);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_add_mul (vdest,
 +                            negate (pix_multiply (vmask, splat_alpha (vsrc))),
 +                            pix_multiply (vsrc, vmask),
 +                            negate (splat_alpha (vdest)));
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +        uint32_t sa = Alpha (s);
 +        uint32_t da = Alpha (d);
 +
 +        FbByteMulC (s, a);
 +        FbByteMul (a, sa);
 +        FbByteAddMulC (d, ~a, s, ~da);
 +        dest[i] = d;
 +    }
 +}
 +
 +static FASTCALL void
 +vmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
 +{
 +    int i;
 +    vector unsigned int  vdest, vsrc, vmask;
 +    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
 +                         dest_mask, mask_mask, src_mask, store_mask;
 +
 +    COMPUTE_SHIFT_MASKC(dest, src, mask)
 +
 +    /* printf ("%s\n",__PRETTY_FUNCTION__); */
 +    for (i = width/4; i > 0; i--) {
 +
 +        LOAD_VECTORSC(dest, src, mask)
 +
 +        vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
 +
 +        STORE_VECTOR(dest)
 +
 +        src+=4;
 +        dest+=4;
 +        mask+=4;
 +    }
 +
 +    for (i = width%4; --i >=0;) {
 +        uint32_t a = mask[i];
 +        uint32_t s = src[i];
 +        uint32_t d = dest[i];
 +
 +        FbByteMulC (s, a);
 +        FbByteAdd (s, d);
 +        dest[i] = s;
 +    }
 +}
 +
 +
 +#if 0
 +void
 +fbCompositeSolid_nx8888vmx (pixman_operator_t	op,
 +			    pixman_image_t * pSrc,
 +			    pixman_image_t * pMask,
 +			    pixman_image_t * pDst,
 +			    int16_t	xSrc,
 +			    int16_t	ySrc,
 +			    int16_t	xMask,
 +			    int16_t	yMask,
 +			    int16_t	xDst,
 +			    int16_t	yDst,
 +			    uint16_t	width,
 +			    uint16_t	height)
 +{
 +    uint32_t	src;
 +    uint32_t	*dstLine, *dst;
 +    int	dstStride;
 +
 +    fbComposeGetSolid (pSrc, pDst, src);
 +
 +    if (src >> 24 == 0)
 +	return;
 +
 +    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
 +
 +    while (height--)
 +    {
 +	dst = dstLine;
 +	dstLine += dstStride;
 +	/* XXX vmxCombineOverU (dst, src, width); */
 +    }
 +}
 +
 +void
 +fbCompositeSolid_nx0565vmx (pixman_operator_t	op,
 +			    pixman_image_t * pSrc,
 +			    pixman_image_t * pMask,
 +			    pixman_image_t * pDst,
 +			    int16_t	xSrc,
 +			    int16_t	ySrc,
 +			    int16_t	xMask,
 +			    int16_t	yMask,
 +			    int16_t	xDst,
 +			    int16_t	yDst,
 +			    uint16_t	width,
 +			    uint16_t	height)
 +{
 +    uint32_t	src;
 +    uint16_t	*dstLine, *dst;
 +    uint16_t	w;
 +    int	dstStride;
 +
 +    fbComposeGetSolid (pSrc, pDst, src);
 +
 +    if (src >> 24 == 0)
 +	return;
 +
 +    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
 +
 +    while (height--)
 +    {
 +	dst = dstLine;
 +	dstLine += dstStride;
 +       vmxCombineOverU565(dst, src, width);
 +    }
 +}
 +
 +#endif
 +
 +void fbComposeSetupVMX (void)
 +{
 +    /* check if we have VMX support and initialize accordingly */
 +    if (pixman_have_vmx ()) {
 +        pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = vmxCombineOverU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_IN] = vmxCombineInU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = vmxCombineOutU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = vmxCombineAtopU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = vmxCombineXorU;
 +        pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = vmxCombineAddU;
 +
 +        pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = vmxCombineSrcC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = vmxCombineOverC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_IN] = vmxCombineInC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_OUT] = vmxCombineOutC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_ATOP] = vmxCombineAtopC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = vmxCombineXorC;
 +        pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = vmxCombineAddC;
 +
 +        pixman_composeFunctions.combineMaskU = vmxCombineMaskU;
 +    }
 +}
commit 27b753c9deabe5ac775021abfae98a6a1830cfc2
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Thu Apr 24 01:08:29 2008 +0200

    Remove unused macro

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 8b17f66..ac050a4 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -338,11 +338,6 @@ over (vector unsigned int src, vector unsigned int srca,
 /* notice you have to declare temp vars...
  * Note: tmp3 and tmp4 must remain untouched!
  */
-#define LOAD_VECTOR (source) \
-        tmp1 = (typeof(v ## source))vec_ld(0, source); \
-        tmp2 = (typeof(v ## source))vec_ld(15, source); \
-        v ## source = (typeof(v ## source)) \
-                       vec_perm(tmp1, tmp2, source ## _mask);
 
 #define LOAD_VECTORS(dest, source) \
         tmp1 = (typeof(tmp1))vec_ld(0, source); \
commit 584118fb6c15d695b6a203c2df51411958957880
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Thu Apr 24 01:06:38 2008 +0200

    Remove VMX from CPUFeatures, ppc isn't using it at all

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 9c6a375..9147af7 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1878,7 +1878,6 @@ enum CPUFeatures {
     NoFeatures = 0,
     MMX = 0x1,
     MMX_Extensions = 0x2,
-    VMX = 0x4,
     SSE = 0x6,
     SSE2 = 0x8,
     CMOV = 0x10
commit fc96121afd5d8451c9d8ba8a693e589d1999d131
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Thu Apr 24 01:03:08 2008 +0200

    Simplify cpu feature check

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 1b4c81f..9c6a375 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1826,44 +1826,38 @@ pixman_image_composite (pixman_op_t      op,
  * "-maltivec -mabi=altivec", as gcc would try to save vector register
  * across function calls causing SIGILL on cpus without Altivec/vmx.
  */
+static pixman_bool_t initialized = FALSE;
+static volatile pixman_bool_t have_vmx = TRUE;
+
 #ifdef __APPLE__
 #include <sys/sysctl.h>
 
 pixman_bool_t pixman_have_vmx (void) {
-    int hasVMX = 0;
-    size_t length = sizeof ( hasVMX );
-    int error = sysctlbyname ("hw.optional.altivec", &hasVMX, &length, NULL, 0);
-    if ( 0 != error ) return 0;
-    return hasVMX;
+    if(!initialized) {
+        size_t length = sizeof(have_vmx);
+        int error =
+            sysctlbyname("hw.optional.altivec", &have_vmx, &length, NULL, 0);
+        if(error) have_vmx = FALSE;
+        initialized = TRUE;
+    }
+    return have_vmx;
 }
 
 #else
 #include <signal.h>
-#include <setjmp.h>
-
-static sigjmp_buf jmp;
-static volatile sig_atomic_t in_test = 0;
 
 static void vmx_test (int sig) {
-    if (! in_test) {
-        signal (sig, SIG_DFL);
-        raise (sig);
-    }
-    in_test = 0;
-    siglongjmp (jmp, 1);
+    have_vmx = FALSE;
 }
 
 pixman_bool_t pixman_have_vmx (void) {
-    signal (SIGILL, vmx_test);
-    if (sigsetjmp (jmp, 1)) {
-        signal (SIGILL, SIG_DFL);
-    } else {
-        in_test = 1;
+    if (!initialized) {
+        signal(SIGILL, vmx_test);
         asm volatile ( "vor 0, 0, 0" );
-        signal (SIGILL, SIG_DFL);
-        return 1;
+        signal(SIGILL, SIG_DFL);
+        initialized = TRUE;
     }
-    return 0;
+    return have_vmx;
 }
 #endif /* __APPLE__ */
 #endif /* USE_VMX */
commit 08b317a5f519978cfabebd75d5595b19fc1d1425
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Thu Apr 24 00:41:16 2008 +0200

    Refactor path selection

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index c758823..1b4c81f 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1742,23 +1742,19 @@ pixman_image_composite (pixman_op_t      op,
 #ifdef USE_SSE2
 	if (pixman_have_sse ())
 	    info = get_fast_path (sse_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-	if (!info)
 #endif
 
 #ifdef USE_MMX
-
-	if (pixman_have_mmx())
+	if (!info && pixman_have_mmx())
 	    info = get_fast_path (mmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-	if (!info)
 #endif
 
 #ifdef USE_VMX
 
-	if (pixman_have_vmx())
+	if (!info && pixman_have_vmx())
 	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
-	if (!info)
 #endif
-
+        if (!info)
 	    info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
 
 	if (info)
commit 083cadd4c7d1270b0ee9f0365327b872898d1561
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Thu Apr 24 00:36:51 2008 +0200

    Force inlining

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 6d275ee..8b17f66 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -29,6 +29,10 @@
 #include "pixman-vmx.h"
 #include <altivec.h>
 
+#ifdef __GNUC__
+#   define inline __inline__ __attribute__ ((__always_inline__))
+#endif
+
 /*
   x_c = (x_c * a) / 255
 */
commit 8e68544e0d8cc7af24fb8b298fd6afd47c620136
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sat Apr 12 13:16:46 2008 +0200

    Unbreak vmx pixman

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 2217e4e..b25fd41 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -50,7 +50,7 @@ endif
 
 # vmx code
 if USE_VMX
-noinst_LTLIBRARIES = libpixman-vmx.la
+noinst_LTLIBRARIES += libpixman-vmx.la
 libpixman_vmx_la_SOURCES = \
 	pixman-vmx.c		\
 	pixman-vmx.h
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 0008dc5..6d275ee 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -29,6 +29,164 @@
 #include "pixman-vmx.h"
 #include <altivec.h>
 
+/*
+  x_c = (x_c * a) / 255
+*/
+#define FbByteMul(x, a) do {					    \
+        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
+        t &= 0xff00ff;						    \
+								    \
+        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;		    \
+        x = (x + ((x >> 8) & 0xff00ff));			    \
+        x &= 0xff00ff00;					    \
+        x += t;							    \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAdd(x, a, y) do {				    \
+        uint32_t t = ((x & 0xff00ff) * a) + 0x800080;               \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			    \
+        t &= 0xff00ff;						    \
+        t += y & 0xff00ff;					    \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);			    \
+        t &= 0xff00ff;						    \
+								    \
+        x = (((x >> 8) & 0xff00ff) * a) + 0x800080;                 \
+        x = (x + ((x >> 8) & 0xff00ff)) >> 8;                       \
+        x &= 0xff00ff;                                              \
+        x += (y >> 8) & 0xff00ff;                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                     \
+        x &= 0xff00ff;                                              \
+        x <<= 8;                                                    \
+        x += t;                                                     \
+    } while (0)
+
+/*
+  x_c = (x_c * a + y_c * b) / 255
+*/
+#define FbByteAddMul(x, a, y, b) do {                                   \
+        uint32_t t;							\
+        uint32_t r = (x >> 24) * a + (y >> 24) * b + 0x80;		\
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        t = (x & 0xff00) * a + (y & 0xff00) * b;                        \
+        t += (t >> 8) + 0x8000;                                         \
+        t >>= 16;                                                       \
+									\
+        t |= r << 16;                                                   \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        t &= 0xff00ff;                                                  \
+        t <<= 8;                                                        \
+									\
+        r = ((x >> 16) & 0xff) * a + ((y >> 16) & 0xff) * b + 0x80;     \
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        x = (x & 0xff) * a + (y & 0xff) * b + 0x80;                     \
+        x += (x >> 8);                                                  \
+        x >>= 8;                                                        \
+        x |= r << 16;                                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
+        x &= 0xff00ff;                                                  \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c) / 255
+*/
+#define FbByteMulC(x, a) do {				  \
+        uint32_t t;                                       \
+        uint32_t r = (x & 0xff) * (a & 0xff);             \
+        r |= (x & 0xff0000) * ((a >> 16) & 0xff);	  \
+	r += 0x800080;					  \
+        r = (r + ((r >> 8) & 0xff00ff)) >> 8;		  \
+        r &= 0xff00ff;					  \
+							  \
+        x >>= 8;					  \
+        t = (x & 0xff) * ((a >> 8) & 0xff);		  \
+        t |= (x & 0xff0000) * (a >> 24);		  \
+        t += 0x800080;					  \
+        t = t + ((t >> 8) & 0xff00ff);			  \
+        x = r | (t & 0xff00ff00);			  \
+							  \
+    } while (0)
+
+/*
+  x_c = (x_c * a) / 255 + y
+*/
+#define FbByteMulAddC(x, a, y) do {				      \
+        uint32_t t;                                                   \
+        uint32_t r = (x & 0xff) * (a & 0xff);                         \
+        r |= (x & 0xff0000) * ((a >> 16) & 0xff);		      \
+	r += 0x800080;						      \
+	r = (r + ((r >> 8) & 0xff00ff)) >> 8;			      \
+        r &= 0xff00ff;						      \
+        r += y & 0xff00ff;					      \
+        r |= 0x1000100 - ((r >> 8) & 0xff00ff);			      \
+        r &= 0xff00ff;						      \
+								      \
+        x >>= 8;                                                       \
+        t = (x & 0xff) * ((a >> 8) & 0xff);                            \
+        t |= (x & 0xff0000) * (a >> 24);                               \
+	t += 0x800080;                                                 \
+        t = (t + ((t >> 8) & 0xff00ff)) >> 8;			       \
+        t &= 0xff00ff;                                                 \
+        t += (y >> 8) & 0xff00ff;                                      \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                        \
+        t &= 0xff00ff;                                                 \
+        x = r | (t << 8);                                              \
+    } while (0)
+
+/*
+  x_c = (x_c * a_c + y_c * b) / 255
+*/
+#define FbByteAddMulC(x, a, y, b) do {                                  \
+        uint32_t t;							\
+        uint32_t r = (x >> 24) * (a >> 24) + (y >> 24) * b;		\
+        r += (r >> 8) + 0x80;                                           \
+        r >>= 8;                                                        \
+									\
+        t = (x & 0xff00) * ((a >> 8) & 0xff) + (y & 0xff00) * b;        \
+        t += (t >> 8) + 0x8000;                                         \
+        t >>= 16;                                                       \
+									\
+        t |= r << 16;                                                   \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        t &= 0xff00ff;                                                  \
+        t <<= 8;                                                        \
+									\
+        r = ((x >> 16) & 0xff) * ((a >> 16) & 0xff) + ((y >> 16) & 0xff) * b + 0x80; \
+        r += (r >> 8);                                                  \
+        r >>= 8;                                                        \
+									\
+        x = (x & 0xff) * (a & 0xff) + (y & 0xff) * b + 0x80;            \
+        x += (x >> 8);                                                  \
+        x >>= 8;                                                        \
+        x |= r << 16;                                                   \
+        x |= 0x1000100 - ((x >> 8) & 0xff00ff);                         \
+        x &= 0xff00ff;                                                  \
+        x |= t;                                                         \
+    } while (0)
+
+/*
+  x_c = min(x_c + y_c, 255)
+*/
+#define FbByteAdd(x, y) do {                                            \
+        uint32_t t;							\
+        uint32_t r = (x & 0xff00ff) + (y & 0xff00ff);			\
+        r |= 0x1000100 - ((r >> 8) & 0xff00ff);                         \
+        r &= 0xff00ff;                                                  \
+									\
+        t = ((x >> 8) & 0xff00ff) + ((y >> 8) & 0xff00ff);              \
+        t |= 0x1000100 - ((t >> 8) & 0xff00ff);                         \
+        r |= (t & 0xff00ff) << 8;                                       \
+        x = r;                                                          \
+    } while (0)
+
 static inline vector unsigned int
 splat_alpha (vector unsigned int pix) {
     return vec_perm (pix, pix,
commit 1ec7bd2cb2d02caca06742b0091f293d29d95a44
Merge: e63bf15... 5388222...
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sat Apr 12 09:53:24 2008 +0200

    Merge branch 'master' into vmx
    
    Conflicts:
    
    	pixman/pixman-pict.c

diff --cc pixman/pixman-pict.c
index 6cc81d7,f01a643..c758823
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@@ -30,10 -30,9 +30,9 @@@
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
- #include "pixman.h"
 -
  #include "pixman-private.h"
  #include "pixman-mmx.h"
 +#include "pixman-vmx.h"
  #include "pixman-sse.h"
  
  #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
commit e63bf1554b4adf9e687ec86213a97caab2218a77
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sun Mar 23 16:12:31 2008 +0100

    Make configure message alike the mmx/sse/sse2 ones

diff --git a/configure.ac b/configure.ac
index 124d3e2..8a396ec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -205,7 +205,7 @@ else
 fi
 
 have_vmx_intrinsics=no
-AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler)
+AC_MSG_CHECKING(whether to use VMX/Altivec intrinsics)
 xserver_save_CFLAGS=$CFLAGS
 CFLAGS="$CFLAGS $VMX_CFLAGS"
 AC_COMPILE_IFELSE([
commit dcc530178050522705e70ff2f09b9da2b358ac01
Merge: 550e5f5... 29a8ae4...
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sun Mar 23 16:04:26 2008 +0100

    Update vmx

diff --cc configure.ac
index 394ce72,c416bc8..124d3e2
--- a/configure.ac
+++ b/configure.ac
@@@ -165,44 -165,38 +165,73 @@@ f
  
  AM_CONDITIONAL(USE_SSE, test $have_sse_intrinsics = yes)
  
+ 
+ dnl ===========================================================================
+ dnl Check for SSE2
+ 
+ SSE_CFLAGS="-mmmx -msse2 -Winline --param inline-unit-growth=10000 --param large-function-growth=10000"
+ 
+ have_sse2_intrinsics=no
+ AC_MSG_CHECKING(whether to use SSE2 intrinsics)
+ xserver_save_CFLAGS=$CFLAGS
+ CFLAGS="$CFLAGS -msse2 $MMX_CFLAGS"
+ 
+ AC_COMPILE_IFELSE([
+ #include <mmintrin.h>
+ #include <xmmintrin.h>
+ int main () {
+     __m128i a, b, c;
+ 	c = _mm_xor_si128 (a, b);
+     return 0;
+ }], have_sse2_intrinsics=yes)
+ CFLAGS=$xserver_save_CFLAGS
+ AC_MSG_RESULT($have_sse2_intrinsics)
+ 
+ if test $have_sse2_intrinsics = yes ; then
+    AC_DEFINE(USE_SSE2, 1, [use SSE compiler intrinsics])
+ fi
+ 
+ AM_CONDITIONAL(USE_SSE2, test $have_sse2_intrinsics = yes)
+ 
  dnl ========================================================
  AC_SUBST(MMX_CFLAGS)
+ AC_SUBST(SSE_CFLAGS)
  
 +dnl Check for VMX/Altivec
 +if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
 +    VMX_CFLAGS="-faltivec"
 +else
 +    VMX_CFLAGS="-maltivec -mabi=altivec"
 +fi
 +
 +have_vmx_intrinsics=no
 +AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler)
 +xserver_save_CFLAGS=$CFLAGS
 +CFLAGS="$CFLAGS $VMX_CFLAGS"
 +AC_COMPILE_IFELSE([
 +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
 +#error "Need GCC >= 3.4 for sane altivec support"
 +#endif
 +#include <altivec.h>
 +int main () {
 +    vector unsigned int v = vec_splat_u32 (1);
 +    v = vec_sub (v, v);
 +    return 0;
 +}], have_vmx_intrinsics=yes)
 +CFLAGS=$xserver_save_CFLAGS
 +AC_MSG_RESULT($have_vmx_intrinsics)
 +
 +if test $have_vmx_intrinsics = yes ; then
 +   AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
 +else
 +   VMX_CFLAGS=
 +fi
 +AC_SUBST(VMX_CFLAGS)
 +
 +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
 +
 +dnl ===========================================================================
 +
  PKG_CHECK_MODULES(GTK, [gtk+-2.0], [HAVE_GTK=yes], [HAVE_GTK=no])
  AM_CONDITIONAL(HAVE_GTK, [test "x$HAVE_GTK" = xyes])
  
diff --cc pixman/Makefile.am
index 467ebdd,1f21f8c..20288b6
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@@ -31,14 -31,15 +31,25 @@@ libpixman_mmx_la_LIBADD = $(DEP_LIBS
  libpixman_1_la_LIBADD += libpixman-mmx.la
  endif
  
 +# vmx code
 +if USE_VMX
 +noinst_LTLIBRARIES = libpixman-vmx.la
 +libpixman_vmx_la_SOURCES = \
 +	pixman-vmx.c		\
 +	pixman-vmx.h
 +libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
 +libpixman_vmx_la_LIBADD = $(DEP_LIBS)
 +libpixman_1_la_LIBADD += libpixman-vmx.la
 +endif
  
+ # sse2 code
+ if USE_SSE2
+ noinst_LTLIBRARIES = libpixman-sse.la
+ libpixman_sse_la_SOURCES = \
+ 	pixman-sse.c \
+ 	pixman-sse.h
+ libpixman_sse_la_CFLAGS = $(DEP_CFLAGS) $(SSE_CFLAGS)
+ libpixman_sse_la_LIBADD = $(DEP_LIBS)
+ libpixman_1_la_LIBADD += libpixman-sse.la
+ endif
+ 
diff --cc pixman/pixman-pict.c
index f36ca0e,e4430d1..6cc81d7
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@@ -29,10 -30,10 +30,11 @@@
  #include <stdio.h>
  #include <stdlib.h>
  #include <string.h>
 -
 +#include "pixman.h"
  #include "pixman-private.h"
  #include "pixman-mmx.h"
 +#include "pixman-vmx.h"
+ #include "pixman-sse.h"
  
  #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
  
@@@ -1495,6 -1484,13 +1485,21 @@@ static const FastPathInfo mmx_fast_path
  };
  #endif
  
+ #ifdef USE_SSE2
+ static const FastPathInfo sse_fast_paths[] =
+ {
+     { PIXMAN_OP_NONE },
+ };
+ #endif
+ 
++#ifdef USE_VMX
++static const FastPathInfo vmx_fast_paths[] =
++{
++    { PIXMAN_OP_NONE },
++};
++#endif
++
++
  static const FastPathInfo c_fast_paths[] =
  {
      { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   fbCompositeSolidMask_nx8x0565, 0 },
@@@ -1658,31 -1654,23 +1663,27 @@@ pixman_image_composite (pixman_op_
  			uint16_t     width,
  			uint16_t     height)
  {
-     pixman_bool_t	    srcRepeat = pSrc->type == BITS && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL;
-     pixman_bool_t	    maskRepeat = FALSE;
-     pixman_bool_t	    srcTransform = pSrc->common.transform != NULL;
-     pixman_bool_t	    maskTransform = FALSE;
-     pixman_bool_t	    srcAlphaMap = pSrc->common.alpha_map != NULL;
-     pixman_bool_t	maskAlphaMap = FALSE;
-     pixman_bool_t	dstAlphaMap = pDst->common.alpha_map != NULL;
-     CompositeFunc   func = NULL;
- 
+     pixman_bool_t srcRepeat = pSrc->type == BITS && pSrc->common.repeat == PIXMAN_REPEAT_NORMAL;
+     pixman_bool_t maskRepeat = FALSE;
+     pixman_bool_t srcTransform = pSrc->common.transform != NULL;
+     pixman_bool_t maskTransform = FALSE;
+     pixman_bool_t srcAlphaMap = pSrc->common.alpha_map != NULL;
+     pixman_bool_t maskAlphaMap = FALSE;
+     pixman_bool_t dstAlphaMap = pDst->common.alpha_map != NULL;
+     CompositeFunc func = NULL;
+ 
+ #ifdef USE_SSE2
+     fbComposeSetupSSE();
+ #endif
+     
  #ifdef USE_MMX
-     static pixman_bool_t mmx_setup = FALSE;
-     if (!mmx_setup)
-     {
-         fbComposeSetupMMX();
-         mmx_setup = TRUE;
-     }
+     fbComposeSetupMMX();
  #endif
+ 
 +#ifdef USE_VMX
-     static pixman_bool_t vmx_setup = FALSE;
-     if (!vmx_setup) {
-         fbComposeSetupVMX();
-         vmx_setup = TRUE;
-     }
++    fbComposeSetupVMX();
 +#endif
 +
      if (srcRepeat && srcTransform &&
  	pSrc->bits.width == 1 &&
  	pSrc->bits.height == 1)
@@@ -1731,6 -1731,6 +1744,14 @@@
  	    info = get_fast_path (mmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
  	if (!info)
  #endif
++
++#ifdef USE_VMX
++
++	if (pixman_have_vmx())
++	    info = get_fast_path (vmx_fast_paths, op, pSrc, pMask, pDst, pixbuf);
++	if (!info)
++#endif
++
  	    info = get_fast_path (c_fast_paths, op, pSrc, pMask, pDst, pixbuf);
  
  	if (info)
@@@ -1860,6 -1813,6 +1881,7 @@@ enum CPUFeatures 
      NoFeatures = 0,
      MMX = 0x1,
      MMX_Extensions = 0x2,
++    VMX = 0x4,
      SSE = 0x6,
      SSE2 = 0x8,
      CMOV = 0x10
commit 550e5f54abe4f3f0b6fcd278c3b4533036276e3f
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sat Mar 22 11:28:48 2008 +0100

    update patch

diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index e530a66..467ebdd 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -33,12 +33,12 @@ endif
 
 # vmx code
 if USE_VMX
-noinst_LTLIBRARIES += libpixman-vmx.la
+noinst_LTLIBRARIES = libpixman-vmx.la
 libpixman_vmx_la_SOURCES = \
 	pixman-vmx.c		\
 	pixman-vmx.h
 libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
 libpixman_vmx_la_LIBADD = $(DEP_LIBS)
-libpixman_la_LIBADD += libpixman-vmx.la
+libpixman_1_la_LIBADD += libpixman-vmx.la
 endif
 
commit 49240111dbb31c335856f9653544a039275bf033
Merge: 808e4f5... 72b46bc...
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Sun Dec 16 00:38:16 2007 +0100

    Merge branch 'master' of git://anongit.freedesktop.org/pixman

diff --cc configure.ac
index b6a9732,b1c2015..394ce72
--- a/configure.ac
+++ b/configure.ac
@@@ -114,44 -130,44 +130,79 @@@ f
- AC_SUBST(MMX_CFLAGS)
  
  AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
  
+ dnl =======================================================
+ 
+ dnl GCC 4.2 when compiling with -msse will generate SSE instructions
+ dnl on its own.  This means anything compiled with -mss can only be
+ dnl run after a runtime check for SSE.  Unfortunately, since we still
+ dnl need to support MMX-but-not-SSE (such as the OLPC), this means we
+ dnl can only use SSE when compiling for x86-64 (where SSE is always
+ dnl supported).
+ 
+ have_sse_intrinsics=no
+ AC_MSG_CHECKING(whether to use SSE intrinsics)
+ xserver_save_CFLAGS=$CFLAGS
+ CFLAGS="$CFLAGS -msse $MMX_CFLAGS"
+ 
+ AC_COMPILE_IFELSE([
+ #if !defined(__amd64__) && !defined(__x86_64__)
+ #error "Need x86-64 for SSE"
+ #endif
+ #include <mmintrin.h>
+ #include <xmmintrin.h>
+ int main () {
+     __m64 v = _mm_cvtsi32_si64 (1);
+     v = _mm_shuffle_pi16 (v, _MM_SHUFFLE(3, 3, 3, 3));
+     return _mm_cvtsi64_si32 (v);
+ }], have_sse_intrinsics=yes)
+ CFLAGS=$xserver_save_CFLAGS
+ AC_MSG_RESULT($have_sse_intrinsics)
+ 
+ if test $have_sse_intrinsics = yes ; then
+    AC_DEFINE(USE_SSE, 1, [use SSE compiler intrinsics])
+    MMX_CFLAGS="-msse $MMX_CFLAGS"
+ fi
+ 
+ AM_CONDITIONAL(USE_SSE, test $have_sse_intrinsics = yes)
+ 
  dnl ========================================================
+ AC_SUBST(MMX_CFLAGS)
  
 +dnl Check for VMX/Altivec
 +if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
 +    VMX_CFLAGS="-faltivec"
 +else
 +    VMX_CFLAGS="-maltivec -mabi=altivec"
 +fi
 +
 +have_vmx_intrinsics=no
 +AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler)
 +xserver_save_CFLAGS=$CFLAGS
 +CFLAGS="$CFLAGS $VMX_CFLAGS"
 +AC_COMPILE_IFELSE([
 +#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
 +#error "Need GCC >= 3.4 for sane altivec support"
 +#endif
 +#include <altivec.h>
 +int main () {
 +    vector unsigned int v = vec_splat_u32 (1);
 +    v = vec_sub (v, v);
 +    return 0;
 +}], have_vmx_intrinsics=yes)
 +CFLAGS=$xserver_save_CFLAGS
 +AC_MSG_RESULT($have_vmx_intrinsics)
 +
 +if test $have_vmx_intrinsics = yes ; then
 +   AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
 +else
 +   VMX_CFLAGS=
 +fi
 +AC_SUBST(VMX_CFLAGS)
 +
 +AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
 +
 +dnl ===========================================================================
 +
  PKG_CHECK_MODULES(GTK, [gtk+-2.0], [HAVE_GTK=yes], [HAVE_GTK=no])
  AM_CONDITIONAL(HAVE_GTK, [test "x$HAVE_GTK" = xyes])
  
commit 808e4f541b4cfde40c91e6c6cd942f9074d38e94
Merge: 33d4028... 39a67d3...
Author: Luca Barbato <lu_zero at gentoo.org>
Date:   Mon Oct 1 22:13:05 2007 +0000

    Merge branch 'master' of git://anongit.freedesktop.org/pixman

diff --cc pixman/Makefile.am
index e60c4eb,66283a2..e530a66
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@@ -34,17 -28,5 +28,17 @@@ libpixman_mmx_la_SOURCES = 
  	pixman-mmx.h
  libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
  libpixman_mmx_la_LIBADD = $(DEP_LIBS)
- libpixman_la_LIBADD += libpixman-mmx.la
+ libpixman_1_la_LIBADD += libpixman-mmx.la
  endif
 +
 +# vmx code
 +if USE_VMX
 +noinst_LTLIBRARIES += libpixman-vmx.la
 +libpixman_vmx_la_SOURCES = \
 +	pixman-vmx.c		\
 +	pixman-vmx.h
 +libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
 +libpixman_vmx_la_LIBADD = $(DEP_LIBS)
 +libpixman_la_LIBADD += libpixman-vmx.la
 +endif
 +
commit 33d4028e3fffa231f40d66b5843de589ec2642fe
Author: root <root at echo.(none)>
Date:   Sun Jul 1 11:42:49 2007 +0000

    First import of vmx

diff --git a/configure.ac b/configure.ac
index b759c7f..81e2a26 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,41 @@ AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
 
 dnl ========================================================
 
+dnl Check for VMX/Altivec
+if test -n "`$CC -v 2>&1 | grep version | grep Apple`"; then
+    VMX_CFLAGS="-faltivec"
+else
+    VMX_CFLAGS="-maltivec -mabi=altivec"
+fi
+
+have_vmx_intrinsics=no
+AC_MSG_CHECKING(For VMX/Altivec intrinsics in the compiler)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$CFLAGS $VMX_CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 4))
+#error "Need GCC >= 3.4 for sane altivec support"
+#endif
+#include <altivec.h>
+int main () {
+    vector unsigned int v = vec_splat_u32 (1);
+    v = vec_sub (v, v);
+    return 0;
+}], have_vmx_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+AC_MSG_RESULT($have_vmx_intrinsics)
+
+if test $have_vmx_intrinsics = yes ; then
+   AC_DEFINE(USE_VMX, 1, [use VMX compiler intrinsics])
+else
+   VMX_CFLAGS=
+fi
+AC_SUBST(VMX_CFLAGS)
+
+AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
+
+dnl ===========================================================================
+
 PKG_CHECK_MODULES(GTK, [gtk+-2.0], [HAVE_GTK=yes], [HAVE_GTK=no])
 AM_CONDITIONAL(HAVE_GTK, [test "x$HAVE_GTK" = xyes])
 
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 90c6436..e60c4eb 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -36,3 +36,15 @@ libpixman_mmx_la_CFLAGS = $(DEP_CFLAGS) $(MMX_CFLAGS)
 libpixman_mmx_la_LIBADD = $(DEP_LIBS)
 libpixman_la_LIBADD += libpixman-mmx.la
 endif
+
+# vmx code
+if USE_VMX
+noinst_LTLIBRARIES += libpixman-vmx.la
+libpixman_vmx_la_SOURCES = \
+	pixman-vmx.c		\
+	pixman-vmx.h
+libpixman_vmx_la_CFLAGS = $(DEP_CFLAGS) $(VMX_CFLAGS)
+libpixman_vmx_la_LIBADD = $(DEP_LIBS)
+libpixman_la_LIBADD += libpixman-vmx.la
+endif
+
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index cad11dd..a857de5 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -29,6 +29,7 @@
 #include "pixman.h"
 #include "pixman-private.h"
 #include "pixman-mmx.h"
+#include "pixman-vmx.h"
 
 #define FbFullMask(n)   ((n) == 32 ? (uint32_t)-1 : ((((uint32_t) 1) << n) - 1))
 
@@ -1416,6 +1417,13 @@ pixman_image_composite (pixman_op_t      op,
         mmx_setup = TRUE;
     }
 #endif
+#ifdef USE_VMX
+    static pixman_bool_t vmx_setup = FALSE;
+    if (!vmx_setup) {
+        fbComposeSetupVMX();
+        vmx_setup = TRUE;
+    }
+#endif
 
     if (srcRepeat && srcTransform &&
 	pSrc->bits.width == 1 &&
@@ -2062,6 +2070,53 @@ pixman_image_composite (pixman_op_t      op,
 }
 
 
+#ifdef USE_VMX
+/* The CPU detection code needs to be in a file not compiled with
+ * "-maltivec -mabi=altivec", as gcc would try to save vector register
+ * across function calls causing SIGILL on cpus without Altivec/vmx.
+ */
+#ifdef __APPLE__
+#include <sys/sysctl.h>
+
+pixman_bool_t pixman_have_vmx (void) {
+    int hasVMX = 0;
+    size_t length = sizeof ( hasVMX );
+    int error = sysctlbyname ("hw.optional.altivec", &hasVMX, &length, NULL, 0);
+    if ( 0 != error ) return 0;
+    return hasVMX;
+}
+
+#else
+#include <signal.h>
+#include <setjmp.h>
+
+static sigjmp_buf jmp;
+static volatile sig_atomic_t in_test = 0;
+
+static void vmx_test (int sig) {
+    if (! in_test) {
+        signal (sig, SIG_DFL);
+        raise (sig);
+    }
+    in_test = 0;
+    siglongjmp (jmp, 1);
+}
+
+pixman_bool_t pixman_have_vmx (void) {
+    signal (SIGILL, vmx_test);
+    if (sigsetjmp (jmp, 1)) {
+        signal (SIGILL, SIG_DFL);
+    } else {
+        in_test = 1;
+        asm volatile ( "vor 0, 0, 0" );
+        signal (SIGILL, SIG_DFL);
+        return 1;
+    }
+    return 0;
+}
+#endif /* __APPLE__ */
+#endif /* USE_VMX */
+
 #ifdef USE_MMX
 /* The CPU detection code needs to be in a file not compiled with
  * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
new file mode 100644
index 0000000..0008dc5
--- /dev/null
+++ b/pixman/pixman-vmx.c
@@ -0,0 +1,1068 @@
+/*
+ * Copyright © 2007 Luca Barbato
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Luca Barbato not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Luca Barbato makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Luca Barbato (lu_zero at gentoo.org)
+ *
+ * Based on fbmmx.c by Owen Taylor, Søren Sandmann and Nicholas Miell
+ */
+
+#include <config.h>
+#include "pixman-vmx.h"
+#include <altivec.h>
+
+static inline vector unsigned int
+splat_alpha (vector unsigned int pix) {
+    return vec_perm (pix, pix,
+    (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04,
+                               0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C));
+}
+
+static inline vector unsigned int
+pix_multiply (vector unsigned int p, vector unsigned int a)
+{
+    vector unsigned short hi, lo, mod;
+    /* unpack to short */
+    hi = (vector unsigned short)
+                    vec_mergeh ((vector unsigned char)AVV(0),
+                                (vector unsigned char)p);
+    mod = (vector unsigned short)
+                    vec_mergeh ((vector unsigned char)AVV(0),
+                                (vector unsigned char)a);
+
+    hi = vec_mladd (hi, mod, (vector unsigned short)
+                            AVV(0x0080,0x0080,0x0080,0x0080,
+                                 0x0080,0x0080,0x0080,0x0080));
+
+    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
+
+    hi = vec_sr (hi, vec_splat_u16 (8));
+
+    /* unpack to short */
+    lo = (vector unsigned short)
+                    vec_mergel ((vector unsigned char)AVV(0),
+                                (vector unsigned char)p);
+    mod = (vector unsigned short)
+                    vec_mergel ((vector unsigned char)AVV(0),
+                                (vector unsigned char)a);
+
+    lo = vec_mladd (lo, mod, (vector unsigned short)
+                            AVV(0x0080,0x0080,0x0080,0x0080,
+                                 0x0080,0x0080,0x0080,0x0080));
+
+    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
+
+    lo = vec_sr (lo, vec_splat_u16 (8));
+
+    return (vector unsigned int)vec_packsu (hi, lo);
+}
+
+static inline vector unsigned int
+pix_add (vector unsigned int a, vector unsigned int b)
+{
+    return (vector unsigned int)vec_adds ((vector unsigned char)a,
+                     (vector unsigned char)b);
+}
+
+static inline vector unsigned int
+pix_add_mul (vector unsigned int x, vector unsigned int a,
+             vector unsigned int y, vector unsigned int b)
+{
+    vector unsigned short hi, lo, mod, hiy, loy, mody;
+
+    hi = (vector unsigned short)
+                    vec_mergeh ((vector unsigned char)AVV(0),
+                                (vector unsigned char)x);
+    mod = (vector unsigned short)
+                    vec_mergeh ((vector unsigned char)AVV(0),
+                                (vector unsigned char)a);
+    hiy = (vector unsigned short)
+                    vec_mergeh ((vector unsigned char)AVV(0),
+                                (vector unsigned char)y);
+    mody = (vector unsigned short)
+                    vec_mergeh ((vector unsigned char)AVV(0),
+                                (vector unsigned char)b);
+
+    hi = vec_mladd (hi, mod, (vector unsigned short)
+                             AVV(0x0080,0x0080,0x0080,0x0080,
+                                  0x0080,0x0080,0x0080,0x0080));
+
+    hi = vec_mladd (hiy, mody, hi);
+
+    hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
+
+    hi = vec_sr (hi, vec_splat_u16 (8));
+
+    lo = (vector unsigned short)
+                    vec_mergel ((vector unsigned char)AVV(0),
+                                (vector unsigned char)x);
+    mod = (vector unsigned short)
+                    vec_mergel ((vector unsigned char)AVV(0),
+                                (vector unsigned char)a);
+
+    loy = (vector unsigned short)
+                    vec_mergel ((vector unsigned char)AVV(0),
+                                (vector unsigned char)y);
+    mody = (vector unsigned short)
+                    vec_mergel ((vector unsigned char)AVV(0),
+                                (vector unsigned char)b);
+
+    lo = vec_mladd (lo, mod, (vector unsigned short)
+                             AVV(0x0080,0x0080,0x0080,0x0080,
+                                  0x0080,0x0080,0x0080,0x0080));
+
+    lo = vec_mladd (loy, mody, lo);
+
+    lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
+
+    lo = vec_sr (lo, vec_splat_u16 (8));
+
+    return (vector unsigned int)vec_packsu (hi, lo);
+}
+
+static inline vector unsigned int
+negate (vector unsigned int src)
+{
+    return vec_nor (src, src);
+}
+/* dest*~srca + src */
+static inline vector unsigned int
+over (vector unsigned int src, vector unsigned int srca,
+      vector unsigned int dest)
+{
+    vector unsigned char tmp = (vector unsigned char)
+                                pix_multiply (dest, negate (srca));
+    tmp = vec_adds ((vector unsigned char)src, tmp);
+    return (vector unsigned int)tmp;
+}
+
+/* in == pix_multiply */
+#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\
+                                             pix_multiply (srca, mask), dest)
+
+
+#define COMPUTE_SHIFT_MASK(source) \
+    source ## _mask = vec_lvsl (0, source);
+
+#define COMPUTE_SHIFT_MASKS(dest, source) \
+    dest ## _mask = vec_lvsl (0, dest); \
+    source ## _mask = vec_lvsl (0, source); \
+    store_mask = vec_lvsr (0, dest);
+
+#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
+    mask ## _mask = vec_lvsl (0, mask); \
+    dest ## _mask = vec_lvsl (0, dest); \
+    source ## _mask = vec_lvsl (0, source); \
+    store_mask = vec_lvsr (0, dest);
+
+/* notice you have to declare temp vars...
+ * Note: tmp3 and tmp4 must remain untouched!
+ */
+#define LOAD_VECTOR (source) \
+        tmp1 = (typeof(v ## source))vec_ld(0, source); \
+        tmp2 = (typeof(v ## source))vec_ld(15, source); \
+        v ## source = (typeof(v ## source)) \
+                       vec_perm(tmp1, tmp2, source ## _mask);
+
+#define LOAD_VECTORS(dest, source) \
+        tmp1 = (typeof(tmp1))vec_ld(0, source); \
+        tmp2 = (typeof(tmp2))vec_ld(15, source); \
+        tmp3 = (typeof(tmp3))vec_ld(0, dest); \
+        v ## source = (typeof(v ## source)) \
+                       vec_perm(tmp1, tmp2, source ## _mask); \
+        tmp4 = (typeof(tmp4))vec_ld(15, dest); \
+        v ## dest = (typeof(v ## dest)) \
+                     vec_perm(tmp3, tmp4, dest ## _mask);
+
+#define LOAD_VECTORSC(dest, source, mask) \
+        tmp1 = (typeof(tmp1))vec_ld(0, source); \
+        tmp2 = (typeof(tmp2))vec_ld(15, source); \
+        tmp3 = (typeof(tmp3))vec_ld(0, dest); \
+        v ## source = (typeof(v ## source)) \
+                       vec_perm(tmp1, tmp2, source ## _mask); \
+        tmp4 = (typeof(tmp4))vec_ld(15, dest); \
+        tmp1 = (typeof(tmp1))vec_ld(0, mask); \
+        v ## dest = (typeof(v ## dest)) \
+                     vec_perm(tmp3, tmp4, dest ## _mask); \
+        tmp2 = (typeof(tmp2))vec_ld(15, mask); \
+        v ## mask = (typeof(v ## mask)) \
+                     vec_perm(tmp1, tmp2, mask ## _mask);
+#define STORE_VECTOR(dest) \
+        edges = vec_perm (tmp4, tmp3, dest ## _mask); \
+        tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
+        tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
+        vec_st ((vector unsigned int) tmp3, 15, dest ); \
+        vec_st ((vector unsigned int) tmp1, 0, dest );
+
+static FASTCALL void
+vmxCombineMaskU (uint32_t *src, const uint32_t *msk, int width)
+{
+    int i;
+    vector unsigned int  vsrc, vmsk;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         src_mask, msk_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(src, msk)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(src, msk)
+
+        vsrc = pix_multiply (vsrc, splat_alpha (vmsk));
+
+        STORE_VECTOR(src)
+
+        msk+=4;
+        src+=4;
+    }
+
+    for (i = width%4; --i >= 0;) {
+        uint32_t a = msk[i] >> 24;
+        uint32_t s = src[i];
+        FbByteMul (s, a);
+        src[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineOverU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = over (vsrc, splat_alpha (vsrc), vdest);
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t ia = Alpha (~s);
+
+        FbByteMulAdd (d, ia, s);
+        dest[i] = d;
+    }
+}
+
+
+static FASTCALL void
+vmxCombineOverReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = over (vdest, splat_alpha (vdest) , vsrc);
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t ia = Alpha (~dest[i]);
+
+        FbByteMulAdd (s, ia, d);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineInU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_multiply (vsrc, splat_alpha (vdest));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+
+        uint32_t s = src[i];
+        uint32_t a = Alpha (dest[i]);
+        FbByteMul (s, a);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineInReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_multiply (vdest, splat_alpha (vsrc));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t d = dest[i];
+        uint32_t a = Alpha (src[i]);
+        FbByteMul (d, a);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineOutU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t a = Alpha (~dest[i]);
+        FbByteMul (s, a);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineOutReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t d = dest[i];
+        uint32_t a = Alpha (~src[i]);
+        FbByteMul (d, a);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineAtopU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_add_mul (vsrc, splat_alpha (vdest),
+                            vdest, splat_alpha (negate (vsrc)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t dest_a = Alpha (d);
+        uint32_t src_ia = Alpha (~s);
+
+        FbByteAddMul (s, dest_a, d, src_ia);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineAtopReverseU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_add_mul (vdest, splat_alpha (vsrc),
+                            vsrc, splat_alpha (negate (vdest)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t src_a = Alpha (s);
+        uint32_t dest_ia = Alpha (~d);
+
+        FbByteAddMul (s, dest_ia, d, src_a);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineXorU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS (dest, src)
+
+        vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
+                            vdest, splat_alpha (negate (vsrc)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t src_ia = Alpha (~s);
+        uint32_t dest_ia = Alpha (~d);
+
+        FbByteAddMul (s, dest_ia, d, src_ia);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineAddU (uint32_t *dest, const uint32_t *src, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKS(dest, src)
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORS(dest, src)
+
+        vdest = pix_add (vsrc, vdest);
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        FbByteAdd (d, s);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineSrcC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask);
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_multiply (vsrc, vmask);
+
+        STORE_VECTOR(dest)
+
+        mask+=4;
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        FbByteMulC (s, a);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineOverC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask);
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
+
+        STORE_VECTOR(dest)
+
+        mask+=4;
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        FbByteMulC (s, a);
+        FbByteMulAddC (d, ~a, s);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineOverReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask);
+    /* printf("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC (dest, src, mask)
+
+        vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
+
+        STORE_VECTOR(dest)
+
+        mask+=4;
+        src+=4;
+        dest+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t da = Alpha (d);
+        FbByteMulC (s, a);
+        FbByteMulAddC (s, ~da, d);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineInC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t da = Alpha (dest[i]);
+        FbByteMul (s, a);
+        FbByteMul (s, da);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineInReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t d = dest[i];
+        uint32_t sa = Alpha (src[i]);
+        FbByteMul (a, sa);
+        FbByteMulC (d, a);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineOutC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t da = Alpha (~d);
+        FbByteMulC (s, a);
+        FbByteMulC (s, da);
+        dest[i] = s;
+    }
+}
+
+static FASTCALL void
+vmxCombineOutReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_multiply (vdest,
+                             negate (pix_multiply (vmask, splat_alpha (vsrc))));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t sa = Alpha (s);
+        FbByteMulC (a, sa);
+        FbByteMulC (d, ~a);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineAtopC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
+                            vdest,
+                            negate (pix_multiply (vmask,
+                                                splat_alpha (vmask))));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t sa = Alpha (s);
+        uint32_t da = Alpha (d);
+
+        FbByteMulC (s, a);
+        FbByteMul (a, sa);
+        FbByteAddMulC (d, ~a, s, da);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineAtopReverseC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_add_mul (vdest,
+                            pix_multiply (vmask, splat_alpha (vsrc)),
+                            pix_multiply (vsrc, vmask),
+                            negate (splat_alpha (vdest)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t sa = Alpha (s);
+        uint32_t da = Alpha (d);
+
+        FbByteMulC (s, a);
+        FbByteMul (a, sa);
+        FbByteAddMulC (d, a, s, ~da);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineXorC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_add_mul (vdest,
+                            negate (pix_multiply (vmask, splat_alpha (vsrc))),
+                            pix_multiply (vsrc, vmask),
+                            negate (splat_alpha (vdest)));
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+        uint32_t sa = Alpha (s);
+        uint32_t da = Alpha (d);
+
+        FbByteMulC (s, a);
+        FbByteMul (a, sa);
+        FbByteAddMulC (d, ~a, s, ~da);
+        dest[i] = d;
+    }
+}
+
+static FASTCALL void
+vmxCombineAddC (uint32_t *dest, uint32_t *src, uint32_t *mask, int width)
+{
+    int i;
+    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
+                         dest_mask, mask_mask, src_mask, store_mask;
+
+    COMPUTE_SHIFT_MASKC(dest, src, mask)
+
+    /* printf ("%s\n",__PRETTY_FUNCTION__); */
+    for (i = width/4; i > 0; i--) {
+
+        LOAD_VECTORSC(dest, src, mask)
+
+        vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
+
+        STORE_VECTOR(dest)
+
+        src+=4;
+        dest+=4;
+        mask+=4;
+    }
+
+    for (i = width%4; --i >=0;) {
+        uint32_t a = mask[i];
+        uint32_t s = src[i];
+        uint32_t d = dest[i];
+
+        FbByteMulC (s, a);
+        FbByteAdd (s, d);
+        dest[i] = s;
+    }
+}
+
+
+#if 0
+void
+fbCompositeSolid_nx8888vmx (pixman_operator_t	op,
+			    pixman_image_t * pSrc,
+			    pixman_image_t * pMask,
+			    pixman_image_t * pDst,
+			    int16_t	xSrc,
+			    int16_t	ySrc,
+			    int16_t	xMask,
+			    int16_t	yMask,
+			    int16_t	xDst,
+			    int16_t	yDst,
+			    uint16_t	width,
+			    uint16_t	height)
+{
+    uint32_t	src;
+    uint32_t	*dstLine, *dst;
+    int	dstStride;
+
+    fbComposeGetSolid (pSrc, pDst, src);
+
+    if (src >> 24 == 0)
+	return;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint32_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+	/* XXX vmxCombineOverU (dst, src, width); */
+    }
+}
+
+void
+fbCompositeSolid_nx0565vmx (pixman_operator_t	op,
+			    pixman_image_t * pSrc,
+			    pixman_image_t * pMask,
+			    pixman_image_t * pDst,
+			    int16_t	xSrc,
+			    int16_t	ySrc,
+			    int16_t	xMask,
+			    int16_t	yMask,
+			    int16_t	xDst,
+			    int16_t	yDst,
+			    uint16_t	width,
+			    uint16_t	height)
+{
+    uint32_t	src;
+    uint16_t	*dstLine, *dst;
+    uint16_t	w;
+    int	dstStride;
+
+    fbComposeGetSolid (pSrc, pDst, src);
+
+    if (src >> 24 == 0)
+	return;
+
+    fbComposeGetStart (pDst, xDst, yDst, uint16_t, dstStride, dstLine, 1);
+
+    while (height--)
+    {
+	dst = dstLine;
+	dstLine += dstStride;
+       vmxCombineOverU565(dst, src, width);
+    }
+}
+
+#endif
+
+void fbComposeSetupVMX (void)
+{
+    /* check if we have VMX support and initialize accordingly */
+    if (pixman_have_vmx ()) {
+        pixman_composeFunctions.combineU[PIXMAN_OP_OVER] = vmxCombineOverU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_IN] = vmxCombineInU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_OUT] = vmxCombineOutU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_ATOP] = vmxCombineAtopU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_XOR] = vmxCombineXorU;
+        pixman_composeFunctions.combineU[PIXMAN_OP_ADD] = vmxCombineAddU;
+
+        pixman_composeFunctions.combineC[PIXMAN_OP_SRC] = vmxCombineSrcC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_OVER] = vmxCombineOverC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_OVER_REVERSE] = vmxCombineOverReverseC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_IN] = vmxCombineInC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_IN_REVERSE] = vmxCombineInReverseC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_OUT] = vmxCombineOutC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_OUT_REVERSE] = vmxCombineOutReverseC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_ATOP] = vmxCombineAtopC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_ATOP_REVERSE] = vmxCombineAtopReverseC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_XOR] = vmxCombineXorC;
+        pixman_composeFunctions.combineC[PIXMAN_OP_ADD] = vmxCombineAddC;
+
+        pixman_composeFunctions.combineMaskU = vmxCombineMaskU;
+    }
+}
diff --git a/pixman/pixman-vmx.h b/pixman/pixman-vmx.h
new file mode 100644
index 0000000..70cb53a
--- /dev/null
+++ b/pixman/pixman-vmx.h
@@ -0,0 +1,308 @@
+/*
+ * Copyright © 2007 Luca Barbato
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Luca Barbato not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  Luca Barbato makes no representations about the
+ * suitability of this software for any purpose.  It is provided "as is"
+ * without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Luca Barbato (lu_zero at gentoo.org)
+ *
+ * Based on work by Owen Taylor, Søren Sandmann and Lars Knoll
+ */
+
+#include "pixman-private.h"
+
+#ifdef USE_VMX
+
+pixman_bool_t pixman_have_vmx(void);
+
+#else
+#define pixman_have_vmx() FALSE
+#endif
+
+#ifdef USE_VMX
+
+#define AVV(x...) {x}
+
+void fbComposeSetupVMX (void);
+
+#if 0
+void fbCompositeIn_nx8x8vmx (pixman_operator_t	op,
+			     pixman_image_t * pSrc,
+			     pixman_image_t * pMask,
+			     pixman_image_t * pDst,
+			     INT16      xSrc,
+			     INT16      ySrc,
+			     INT16      xMask,
+			     INT16      yMask,
+			     INT16      xDst,
+			     INT16      yDst,
+			     CARD16     width,
+			     CARD16     height);
+
+void fbCompositeSolidMask_nx8888x0565Cvmx (pixman_operator_t      op,
+					   pixman_image_t * pSrc,
+					   pixman_image_t * pMask,
+					   pixman_image_t * pDst,
+					   INT16      xSrc,
+					   INT16      ySrc,
+					   INT16      xMask,
+					   INT16      yMask,
+					   INT16      xDst,
+					   INT16      yDst,
+					   CARD16     width,
+					   CARD16     height);
+
+void fbCompositeSrcAdd_8888x8888vmx (pixman_operator_t	op,
+				     pixman_image_t *	pSrc,
+				     pixman_image_t *	pMask,
+				     pixman_image_t *	pDst,
+				     INT16	xSrc,
+				     INT16      ySrc,
+				     INT16      xMask,
+				     INT16      yMask,
+				     INT16      xDst,
+				     INT16      yDst,
+				     CARD16     width,
+				     CARD16     height);
+
+void fbCompositeSolidMask_nx8888x8888Cvmx (pixman_operator_t	op,
+					   pixman_image_t *	pSrc,
+					   pixman_image_t *	pMask,
+					   pixman_image_t *	pDst,
+					   INT16	xSrc,
+					   INT16	ySrc,
+					   INT16	xMask,
+					   INT16	yMask,
+					   INT16	xDst,
+					   INT16	yDst,
+					   CARD16	width,
+					   CARD16	height);
+
+void fbCompositeSolidMask_nx8x8888vmx (pixman_operator_t      op,
+				       pixman_image_t * pSrc,
+				       pixman_image_t * pMask,
+				       pixman_image_t * pDst,
+				       INT16      xSrc,
+				       INT16      ySrc,
+				       INT16      xMask,
+				       INT16      yMask,
+				       INT16      xDst,
+				       INT16      yDst,
+				       CARD16     width,
+				       CARD16     height);
+
+void fbCompositeSolidMaskSrc_nx8x8888vmx (pixman_operator_t      op,
+					  pixman_image_t * pSrc,
+					  pixman_image_t * pMask,
+					  pixman_image_t * pDst,
+					  INT16      xSrc,
+					  INT16      ySrc,
+					  INT16      xMask,
+					  INT16      yMask,
+					  INT16      xDst,
+					  INT16      yDst,
+					  CARD16     width,
+					  CARD16     height);
+
+void fbCompositeSrcAdd_8888x8x8vmx (pixman_operator_t   op,
+				    pixman_image_t * pSrc,
+				    pixman_image_t * pMask,
+				    pixman_image_t * pDst,
+				    INT16      xSrc,
+				    INT16      ySrc,
+				    INT16      xMask,
+				    INT16      yMask,
+				    INT16      xDst,
+				    INT16      yDst,
+				    CARD16     width,
+				    CARD16     height);
+
+void fbCompositeIn_8x8vmx (pixman_operator_t	op,
+			   pixman_image_t * pSrc,
+			   pixman_image_t * pMask,
+			   pixman_image_t * pDst,
+			   INT16      xSrc,
+			   INT16      ySrc,
+			   INT16      xMask,
+			   INT16      yMask,
+			   INT16      xDst,
+			   INT16      yDst,
+			   CARD16     width,
+			   CARD16     height);
+
+void fbCompositeSrcAdd_8000x8000vmx (pixman_operator_t	op,
+				     pixman_image_t * pSrc,
+				     pixman_image_t * pMask,
+				     pixman_image_t * pDst,
+				     INT16      xSrc,
+				     INT16      ySrc,
+				     INT16      xMask,
+				     INT16      yMask,
+				     INT16      xDst,
+				     INT16      yDst,
+				     CARD16     width,
+				     CARD16     height);
+
+void fbCompositeSrc_8888RevNPx8888vmx (pixman_operator_t      op,
+				       pixman_image_t * pSrc,
+				       pixman_image_t * pMask,
+				       pixman_image_t * pDst,
+				       INT16      xSrc,
+				       INT16      ySrc,
+				       INT16      xMask,
+				       INT16      yMask,
+				       INT16      xDst,
+				       INT16      yDst,
+				       CARD16     width,
+				       CARD16     height);
+
+void fbCompositeSrc_8888x0565vmx (pixman_operator_t      op,
+				  pixman_image_t * pSrc,
+				  pixman_image_t * pMask,
+				  pixman_image_t * pDst,
+				  INT16      xSrc,
+				  INT16      ySrc,
+				  INT16      xMask,
+				  INT16      yMask,
+				  INT16      xDst,
+				  INT16      yDst,
+				  CARD16     width,
+				  CARD16     height);
+
+void fbCompositeSrc_8888RevNPx0565vmx (pixman_operator_t      op,
+				       pixman_image_t * pSrc,
+				       pixman_image_t * pMask,
+				       pixman_image_t * pDst,
+				       INT16      xSrc,
+				       INT16      ySrc,
+				       INT16      xMask,
+				       INT16      yMask,
+				       INT16      xDst,
+				       INT16      yDst,
+				       CARD16     width,
+				       CARD16     height);
+
+void fbCompositeSolid_nx8888vmx (pixman_operator_t		op,
+				 pixman_image_t *	pSrc,
+				 pixman_image_t *	pMask,
+				 pixman_image_t *	pDst,
+				 INT16		xSrc,
+				 INT16		ySrc,
+				 INT16		xMask,
+				 INT16		yMask,
+				 INT16		xDst,
+				 INT16		yDst,
+				 CARD16		width,
+				 CARD16		height);
+
+void fbCompositeSolid_nx0565vmx (pixman_operator_t		op,
+				 pixman_image_t *	pSrc,
+				 pixman_image_t *	pMask,
+				 pixman_image_t *	pDst,
+				 INT16		xSrc,
+				 INT16		ySrc,
+				 INT16		xMask,
+				 INT16		yMask,
+				 INT16		xDst,
+				 INT16		yDst,
+				 CARD16		width,
+				 CARD16		height);
+
+void fbCompositeSolidMask_nx8x0565vmx (pixman_operator_t      op,
+				       pixman_image_t * pSrc,
+				       pixman_image_t * pMask,
+				       pixman_image_t * pDst,
+				       INT16      xSrc,
+				       INT16      ySrc,
+				       INT16      xMask,
+				       INT16      yMask,
+				       INT16      xDst,
+				       INT16      yDst,
+				       CARD16     width,
+				       CARD16     height);
+
+void fbCompositeSrc_x888x8x8888vmx (pixman_operator_t	op,
+				    pixman_image_t *  pSrc,
+				    pixman_image_t *  pMask,
+				    pixman_image_t *  pDst,
+				    INT16	xSrc,
+				    INT16	ySrc,
+				    INT16       xMask,
+				    INT16       yMask,
+				    INT16       xDst,
+				    INT16       yDst,
+				    CARD16      width,
+				    CARD16      height);
+
+void fbCompositeSrc_8888x8x8888vmx (pixman_operator_t	op,
+				    pixman_image_t *  pSrc,
+				    pixman_image_t *  pMask,
+				    pixman_image_t *  pDst,
+				    INT16	xSrc,
+				    INT16	ySrc,
+				    INT16       xMask,
+				    INT16       yMask,
+				    INT16       xDst,
+				    INT16       yDst,
+				    CARD16      width,
+				    CARD16      height);
+
+void fbCompositeSrc_8888x8888vmx (pixman_operator_t      op,
+				  pixman_image_t * pSrc,
+				  pixman_image_t * pMask,
+				  pixman_image_t * pDst,
+				  INT16      xSrc,
+				  INT16      ySrc,
+				  INT16      xMask,
+				  INT16      yMask,
+				  INT16      xDst,
+				  INT16      yDst,
+				  CARD16     width,
+				  CARD16     height);
+
+pixman_bool_t fbCopyAreavmx (FbPixels	*pSrc,
+		    FbPixels	*pDst,
+		    int		src_x,
+		    int		src_y,
+		    int		dst_x,
+		    int		dst_y,
+		    int		width,
+		    int		height);
+
+void fbCompositeCopyAreavmx (pixman_operator_t	op,
+			     pixman_image_t *	pSrc,
+			     pixman_image_t *	pMask,
+			     pixman_image_t *	pDst,
+			     INT16	xSrc,
+			     INT16      ySrc,
+			     INT16      xMask,
+			     INT16      yMask,
+			     INT16      xDst,
+			     INT16      yDst,
+			     CARD16     width,
+			     CARD16     height);
+
+pixman_bool_t fbSolidFillvmx (FbPixels	*pDraw,
+		     int		x,
+		     int		y,
+		     int		width,
+		     int		height,
+		     FbBits		xor);
+#endif
+#endif /* USE_VMX */


More information about the xorg-commit mailing list