pixman: Branch 'master' - 29 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Mon Jul 13 18:57:41 PDT 2009


 pixman/pixman-access.c           | 2516 +++++++++-------
 pixman/pixman-arm-neon.c         | 3879 ++++++++++++------------
 pixman/pixman-arm-simd.c         |  491 +--
 pixman/pixman-bits-image.c       |  399 +-
 pixman/pixman-combine.c.template | 2070 +++++++------
 pixman/pixman-combine.h.template |  340 +-
 pixman/pixman-compiler.h         |   12 
 pixman/pixman-conical-gradient.c |  138 
 pixman/pixman-cpu.c              |  426 +-
 pixman/pixman-edge.c             |  345 +-
 pixman/pixman-fast-path.c        |  976 +++---
 pixman/pixman-general.c          |  215 -
 pixman/pixman-gradient-walker.c  |  180 -
 pixman/pixman-image.c            |  207 -
 pixman/pixman-implementation.c   |  293 -
 pixman/pixman-linear-gradient.c  |  181 -
 pixman/pixman-matrix.c           | 1010 +++---
 pixman/pixman-mmx.c              | 2457 ++++++++-------
 pixman/pixman-private.h          |  823 ++---
 pixman/pixman-radial-gradient.c  |  163 -
 pixman/pixman-region.c           | 2164 +++++++------
 pixman/pixman-region16.c         |    4 
 pixman/pixman-solid-fill.c       |   34 
 pixman/pixman-sse2.c             | 6126 +++++++++++++++++++++------------------
 pixman/pixman-timer.c            |   12 
 pixman/pixman-trap.c             |  222 -
 pixman/pixman-utils.c            |  550 +--
 pixman/pixman-vmx.c              | 2301 +++++++-------
 pixman/pixman.c                  |  209 -
 pixman/pixman.h                  |  450 +-
 test/scaling-test.c              |  638 ++--
 31 files changed, 16418 insertions(+), 13413 deletions(-)

New commits:
commit 83f6e2eacff826ef9dbdefb95fcb76fa1247ac4e
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Mon Jul 13 19:58:04 2009 -0400

    Reindent and rename identifiers in scaling-test.c

diff --git a/test/scaling-test.c b/test/scaling-test.c
index 79f05ca..8899c59 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -29,164 +29,190 @@
 
 static uint32_t lcg_seed;
 
-uint32_t lcg_rand(void)
+uint32_t
+lcg_rand (void)
 {
     lcg_seed = lcg_seed * 1103515245 + 12345;
     return ((uint32_t)(lcg_seed / 65536) % 32768);
 }
 
-void lcg_srand(uint32_t seed)
+void
+lcg_srand (uint32_t seed)
 {
     lcg_seed = seed;
 }
 
-uint32_t lcg_rand_n(int max)
+uint32_t
+lcg_rand_n (int max)
 {
-    return lcg_rand() % max;
+    return lcg_rand () % max;
 }
 
 /*----------------------------------------------------------------------------*\
- *  CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29.
- *
- *  This program generates the CRC-32 values for the files named in the
- *  command-line arguments.  These are the same CRC-32 values used by GZIP,
- *  PKZIP, and ZMODEM.  The Crc32_ComputeBuf() can also be detached and
- *  used independently.
- *
- *  THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE.
- *
- *  Based on the byte-oriented implementation "File Verification Using CRC"
- *  by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67.
- *
- *  v1.0.0: original release.
- *  v1.0.1: fixed printf formats.
- *  v1.0.2: fixed something else.
- *  v1.0.3: replaced CRC constant table by generator function.
- *  v1.0.4: reformatted code, made ANSI C.  1994-12-05.
- *  v2.0.0: rewrote to use memory buffer & static table, 2006-04-29.
+*  CRC-32 version 2.0.0 by Craig Bruce, 2006-04-29.
+*
+*  This program generates the CRC-32 values for the files named in the
+*  command-line arguments.  These are the same CRC-32 values used by GZIP,
+*  PKZIP, and ZMODEM.  The compute_crc32() can also be detached and
+*  used independently.
+*
+*  THIS PROGRAM IS PUBLIC-DOMAIN SOFTWARE.
+*
+*  Based on the byte-oriented implementation "File Verification Using CRC"
+*  by Mark R. Nelson in Dr. Dobb's Journal, May 1992, pp. 64-67.
+*
+*  v1.0.0: original release.
+*  v1.0.1: fixed printf formats.
+*  v1.0.2: fixed something else.
+*  v1.0.3: replaced CRC constant table by generator function.
+*  v1.0.4: reformatted code, made ANSI C.  1994-12-05.
+*  v2.0.0: rewrote to use memory buffer & static table, 2006-04-29.
 \*----------------------------------------------------------------------------*/
 
 /*----------------------------------------------------------------------------*\
- *  NAME:
- *     Crc32_ComputeBuf() - computes the CRC-32 value of a memory buffer
- *  DESCRIPTION:
- *     Computes or accumulates the CRC-32 value for a memory buffer.
- *     The 'inCrc32' gives a previously accumulated CRC-32 value to allow
- *     a CRC to be generated for multiple sequential buffer-fuls of data.
- *     The 'inCrc32' for the first buffer must be zero.
- *  ARGUMENTS:
- *     inCrc32 - accumulated CRC-32 value, must be 0 on first call
- *     buf     - buffer to compute CRC-32 value for
- *     bufLen  - number of bytes in buffer
- *  RETURNS:
- *     crc32 - computed CRC-32 value
- *  ERRORS:
- *     (no errors are possible)
+*  NAME:
+*     compute_crc32() - computes the CRC-32 value of a memory buffer
+*  DESCRIPTION:
+*     Computes or accumulates the CRC-32 value for a memory buffer.
+*     The 'in_crc32' gives a previously accumulated CRC-32 value to allow
+*     a CRC to be generated for multiple sequential buffer-fuls of data.
+*     The 'in_crc32' for the first buffer must be zero.
+*  ARGUMENTS:
+*     in_crc32 - accumulated CRC-32 value, must be 0 on first call
+*     buf     - buffer to compute CRC-32 value for
+*     buf_len  - number of bytes in buffer
+*  RETURNS:
+*     crc32 - computed CRC-32 value
+*  ERRORS:
+*     (no errors are possible)
 \*----------------------------------------------------------------------------*/
 
-static uint32_t Crc32_ComputeBuf( uint32_t inCrc32, const void *buf,
-                                       size_t bufLen )
+static uint32_t
+compute_crc32 (uint32_t    in_crc32,
+		  const void *buf,
+		  size_t      buf_len)
 {
-    static const uint32_t crcTable[256] = {
-   0x00000000,0x77073096,0xEE0E612C,0x990951BA,0x076DC419,0x706AF48F,0xE963A535,
-   0x9E6495A3,0x0EDB8832,0x79DCB8A4,0xE0D5E91E,0x97D2D988,0x09B64C2B,0x7EB17CBD,
-   0xE7B82D07,0x90BF1D91,0x1DB71064,0x6AB020F2,0xF3B97148,0x84BE41DE,0x1ADAD47D,
-   0x6DDDE4EB,0xF4D4B551,0x83D385C7,0x136C9856,0x646BA8C0,0xFD62F97A,0x8A65C9EC,
-   0x14015C4F,0x63066CD9,0xFA0F3D63,0x8D080DF5,0x3B6E20C8,0x4C69105E,0xD56041E4,
-   0xA2677172,0x3C03E4D1,0x4B04D447,0xD20D85FD,0xA50AB56B,0x35B5A8FA,0x42B2986C,
-   0xDBBBC9D6,0xACBCF940,0x32D86CE3,0x45DF5C75,0xDCD60DCF,0xABD13D59,0x26D930AC,
-   0x51DE003A,0xC8D75180,0xBFD06116,0x21B4F4B5,0x56B3C423,0xCFBA9599,0xB8BDA50F,
-   0x2802B89E,0x5F058808,0xC60CD9B2,0xB10BE924,0x2F6F7C87,0x58684C11,0xC1611DAB,
-   0xB6662D3D,0x76DC4190,0x01DB7106,0x98D220BC,0xEFD5102A,0x71B18589,0x06B6B51F,
-   0x9FBFE4A5,0xE8B8D433,0x7807C9A2,0x0F00F934,0x9609A88E,0xE10E9818,0x7F6A0DBB,
-   0x086D3D2D,0x91646C97,0xE6635C01,0x6B6B51F4,0x1C6C6162,0x856530D8,0xF262004E,
-   0x6C0695ED,0x1B01A57B,0x8208F4C1,0xF50FC457,0x65B0D9C6,0x12B7E950,0x8BBEB8EA,
-   0xFCB9887C,0x62DD1DDF,0x15DA2D49,0x8CD37CF3,0xFBD44C65,0x4DB26158,0x3AB551CE,
-   0xA3BC0074,0xD4BB30E2,0x4ADFA541,0x3DD895D7,0xA4D1C46D,0xD3D6F4FB,0x4369E96A,
-   0x346ED9FC,0xAD678846,0xDA60B8D0,0x44042D73,0x33031DE5,0xAA0A4C5F,0xDD0D7CC9,
-   0x5005713C,0x270241AA,0xBE0B1010,0xC90C2086,0x5768B525,0x206F85B3,0xB966D409,
-   0xCE61E49F,0x5EDEF90E,0x29D9C998,0xB0D09822,0xC7D7A8B4,0x59B33D17,0x2EB40D81,
-   0xB7BD5C3B,0xC0BA6CAD,0xEDB88320,0x9ABFB3B6,0x03B6E20C,0x74B1D29A,0xEAD54739,
-   0x9DD277AF,0x04DB2615,0x73DC1683,0xE3630B12,0x94643B84,0x0D6D6A3E,0x7A6A5AA8,
-   0xE40ECF0B,0x9309FF9D,0x0A00AE27,0x7D079EB1,0xF00F9344,0x8708A3D2,0x1E01F268,
-   0x6906C2FE,0xF762575D,0x806567CB,0x196C3671,0x6E6B06E7,0xFED41B76,0x89D32BE0,
-   0x10DA7A5A,0x67DD4ACC,0xF9B9DF6F,0x8EBEEFF9,0x17B7BE43,0x60B08ED5,0xD6D6A3E8,
-   0xA1D1937E,0x38D8C2C4,0x4FDFF252,0xD1BB67F1,0xA6BC5767,0x3FB506DD,0x48B2364B,
-   0xD80D2BDA,0xAF0A1B4C,0x36034AF6,0x41047A60,0xDF60EFC3,0xA867DF55,0x316E8EEF,
-   0x4669BE79,0xCB61B38C,0xBC66831A,0x256FD2A0,0x5268E236,0xCC0C7795,0xBB0B4703,
-   0x220216B9,0x5505262F,0xC5BA3BBE,0xB2BD0B28,0x2BB45A92,0x5CB36A04,0xC2D7FFA7,
-   0xB5D0CF31,0x2CD99E8B,0x5BDEAE1D,0x9B64C2B0,0xEC63F226,0x756AA39C,0x026D930A,
-   0x9C0906A9,0xEB0E363F,0x72076785,0x05005713,0x95BF4A82,0xE2B87A14,0x7BB12BAE,
-   0x0CB61B38,0x92D28E9B,0xE5D5BE0D,0x7CDCEFB7,0x0BDBDF21,0x86D3D2D4,0xF1D4E242,
-   0x68DDB3F8,0x1FDA836E,0x81BE16CD,0xF6B9265B,0x6FB077E1,0x18B74777,0x88085AE6,
-   0xFF0F6A70,0x66063BCA,0x11010B5C,0x8F659EFF,0xF862AE69,0x616BFFD3,0x166CCF45,
-   0xA00AE278,0xD70DD2EE,0x4E048354,0x3903B3C2,0xA7672661,0xD06016F7,0x4969474D,
-   0x3E6E77DB,0xAED16A4A,0xD9D65ADC,0x40DF0B66,0x37D83BF0,0xA9BCAE53,0xDEBB9EC5,
-   0x47B2CF7F,0x30B5FFE9,0xBDBDF21C,0xCABAC28A,0x53B39330,0x24B4A3A6,0xBAD03605,
-   0xCDD70693,0x54DE5729,0x23D967BF,0xB3667A2E,0xC4614AB8,0x5D681B02,0x2A6F2B94,
-   0xB40BBE37,0xC30C8EA1,0x5A05DF1B,0x2D02EF8D };
-    uint32_t crc32;
-    unsigned char *byteBuf;
-    size_t i;
+    static const uint32_t crc_table[256] = {
+	0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F,
+	0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
+	0x09B64C2B, 0x7EB17CBD,	0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2,
+	0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
+	0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC,	0x14015C4F, 0x63066CD9,
+	0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
+	0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
+	0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
+	0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423,
+	0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
+	0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106,
+	0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
+	0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
+	0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
+	0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
+	0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
+	0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7,
+	0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
+	0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA,
+	0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
+	0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
+	0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
+	0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84,
+	0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
+	0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
+	0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
+	0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E,
+	0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
+	0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55,
+	0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
+	0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28,
+	0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
+	0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F,
+	0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
+	0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
+	0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
+	0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69,
+	0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
+	0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
+	0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
+	0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693,
+	0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
+	0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
+    };
+    
+    uint32_t              crc32;
+    unsigned char *       byte_buf;
+    size_t                i;
 
     /** accumulate crc32 for buffer **/
-    crc32 = inCrc32 ^ 0xFFFFFFFF;
-    byteBuf = (unsigned char*) buf;
-    for (i=0; i < bufLen; i++) {
-        crc32 = (crc32 >> 8) ^ crcTable[ (crc32 ^ byteBuf[i]) & 0xFF ];
-    }
-    return( crc32 ^ 0xFFFFFFFF );
+    crc32 = in_crc32 ^ 0xFFFFFFFF;
+    byte_buf = (unsigned char*) buf;
+
+    for (i = 0; i < buf_len; i++)
+	crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF];
+    
+    return (crc32 ^ 0xFFFFFFFF);
 }
 
 /* perform endian conversion of pixel data */
-static void image_endian_swap(pixman_image_t *img, int bpp)
+static void
+image_endian_swap (pixman_image_t *img,
+		   int             bpp)
 {
-    int stride = pixman_image_get_stride(img);
-    uint32_t *data = pixman_image_get_data(img);
-    int height = pixman_image_get_height(img);;
+    int       stride = pixman_image_get_stride (img);
+    uint32_t *data = pixman_image_get_data (img);
+    int       height = pixman_image_get_height (img);
     int i, j;
 
     /* swap bytes only on big endian systems */
     volatile uint16_t endian_check_var = 0x1234;
-    if (*(volatile uint8_t *)&endian_check_var != 0x12) return;
-
-    for(i = 0; i < height; i++) {
-        char *line_data = (char *)data + stride * i;
-        /* swap bytes only for 16, 24 and 32 bpp for now */
-        switch (bpp) {
-        case 16:
-            for (j = 0; j + 2 <= stride; j += 2) {
-                char t1 = line_data[j + 0];
-                char t2 = line_data[j + 1];
-                line_data[j + 1] = t1;
-                line_data[j + 0] = t2;
-            }
-            break;
-        case 24:
-            for (j = 0; j + 3 <= stride; j += 3) {
-                char t1 = line_data[j + 0];
-                char t2 = line_data[j + 1];
-                char t3 = line_data[j + 2];
-                line_data[j + 2] = t1;
-                line_data[j + 1] = t2;
-                line_data[j + 0] = t3;
-            }
-            break;
-        case 32:
-            for (j = 0; j + 4 <= stride; j += 4) {
-                char t1 = line_data[j + 0];
-                char t2 = line_data[j + 1];
-                char t3 = line_data[j + 2];
-                char t4 = line_data[j + 3];
-                line_data[j + 3] = t1;
-                line_data[j + 2] = t2;
-                line_data[j + 1] = t3;
-                line_data[j + 0] = t4;
-            }
-            break;
-        default:
-            break;
-        }
+    if (*(volatile uint8_t *)&endian_check_var != 0x12)
+	return;
+
+    for (i = 0; i < height; i++)
+    {
+	char *line_data = (char *)data + stride * i;
+	
+	/* swap bytes only for 16, 24 and 32 bpp for now */
+	switch (bpp)
+	{
+	case 16:
+	    for (j = 0; j + 2 <= stride; j += 2)
+	    {
+		char t1 = line_data[j + 0];
+		char t2 = line_data[j + 1];
+		line_data[j + 1] = t1;
+		line_data[j + 0] = t2;
+	    }
+	    break;
+
+	case 24:
+	    for (j = 0; j + 3 <= stride; j += 3)
+	    {
+		char t1 = line_data[j + 0];
+		char t2 = line_data[j + 1];
+		char t3 = line_data[j + 2];
+		line_data[j + 2] = t1;
+		line_data[j + 1] = t2;
+		line_data[j + 0] = t3;
+	    }
+	    break;
+
+	case 32:
+	    for (j = 0; j + 4 <= stride; j += 4)
+	    {
+		char t1 = line_data[j + 0];
+		char t2 = line_data[j + 1];
+		char t3 = line_data[j + 2];
+		char t4 = line_data[j + 3];
+		line_data[j + 3] = t1;
+		line_data[j + 2] = t2;
+		line_data[j + 1] = t3;
+		line_data[j + 0] = t4;
+	    }
+	    break;
+
+	default:
+	    break;
+	}
     }
 }
 
@@ -199,202 +225,250 @@ static void image_endian_swap(pixman_image_t *img, int bpp)
 /*
  * Composite operation with pseudorandom images
  */
-uint32_t test_composite(uint32_t initcrc, int testnum, int verbose)
+uint32_t
+test_composite (uint32_t initcrc,
+		int      testnum,
+		int      verbose)
 {
-    int i;
-    pixman_image_t *src_img;
-    pixman_image_t *dst_img;
+    int                i;
+    pixman_image_t *   src_img;
+    pixman_image_t *   dst_img;
     pixman_transform_t transform;
-    pixman_region16_t clip;
-    int src_width, src_height;
-    int dst_width, dst_height;
-    int src_stride, dst_stride;
-    int src_x, src_y;
-    int dst_x, dst_y;
-    int src_bpp;
-    int dst_bpp;
-    int w, h;
-    int scale_x = 32768, scale_y = 32768;
-    int op;
-    int repeat = 0;
-    int src_fmt, dst_fmt;
-    uint32_t *srcbuf;
-    uint32_t *dstbuf;
-    uint32_t crc32;
-
-    lcg_srand(testnum);
-
-    src_bpp = (lcg_rand_n(2) == 0) ? 2 : 4;
-    dst_bpp = (lcg_rand_n(2) == 0) ? 2 : 4;
-    op = (lcg_rand_n(2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
-
-    src_width = lcg_rand_n(MAX_SRC_WIDTH) + 1;
-    src_height = lcg_rand_n(MAX_SRC_HEIGHT) + 1;
-    dst_width = lcg_rand_n(MAX_DST_WIDTH) + 1;
-    dst_height = lcg_rand_n(MAX_DST_HEIGHT) + 1;
-    src_stride = src_width * src_bpp + lcg_rand_n(MAX_STRIDE) * src_bpp;
-    dst_stride = dst_width * dst_bpp + lcg_rand_n(MAX_STRIDE) * dst_bpp;
-    if (src_stride & 3) src_stride += 2;
-    if (dst_stride & 3) dst_stride += 2;
-
-    src_x = -(src_width / 4) + lcg_rand_n(src_width * 3 / 2);
-    src_y = -(src_height / 4) + lcg_rand_n(src_height * 3 / 2);
-    dst_x = -(dst_width / 4) + lcg_rand_n(dst_width * 3 / 2);
-    dst_y = -(dst_height / 4) + lcg_rand_n(dst_height * 3 / 2);
-    w = lcg_rand_n(dst_width * 3 / 2 - dst_x);
-    h = lcg_rand_n(dst_height * 3 / 2 - dst_y);
-
-    srcbuf = (uint32_t *)malloc(src_stride * src_height);
-    dstbuf = (uint32_t *)malloc(dst_stride * dst_height);
+    pixman_region16_t  clip;
+    int                src_width, src_height;
+    int                dst_width, dst_height;
+    int                src_stride, dst_stride;
+    int                src_x, src_y;
+    int                dst_x, dst_y;
+    int                src_bpp;
+    int                dst_bpp;
+    int                w, h;
+    int                scale_x = 32768, scale_y = 32768;
+    int                op;
+    int                repeat = 0;
+    int                src_fmt, dst_fmt;
+    uint32_t *         srcbuf;
+    uint32_t *         dstbuf;
+    uint32_t           crc32;
+
+    lcg_srand (testnum);
+
+    src_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
+    dst_bpp = (lcg_rand_n (2) == 0) ? 2 : 4;
+    op = (lcg_rand_n (2) == 0) ? PIXMAN_OP_SRC : PIXMAN_OP_OVER;
+
+    src_width = lcg_rand_n (MAX_SRC_WIDTH) + 1;
+    src_height = lcg_rand_n (MAX_SRC_HEIGHT) + 1;
+    dst_width = lcg_rand_n (MAX_DST_WIDTH) + 1;
+    dst_height = lcg_rand_n (MAX_DST_HEIGHT) + 1;
+    src_stride = src_width * src_bpp + lcg_rand_n (MAX_STRIDE) * src_bpp;
+    dst_stride = dst_width * dst_bpp + lcg_rand_n (MAX_STRIDE) * dst_bpp;
+
+    if (src_stride & 3)
+	src_stride += 2;
+    
+    if (dst_stride & 3)
+	dst_stride += 2;
+
+    src_x = -(src_width / 4) + lcg_rand_n (src_width * 3 / 2);
+    src_y = -(src_height / 4) + lcg_rand_n (src_height * 3 / 2);
+    dst_x = -(dst_width / 4) + lcg_rand_n (dst_width * 3 / 2);
+    dst_y = -(dst_height / 4) + lcg_rand_n (dst_height * 3 / 2);
+    w = lcg_rand_n (dst_width * 3 / 2 - dst_x);
+    h = lcg_rand_n (dst_height * 3 / 2 - dst_y);
+
+    srcbuf = (uint32_t *)malloc (src_stride * src_height);
+    dstbuf = (uint32_t *)malloc (dst_stride * dst_height);
+
     for (i = 0; i < src_stride * src_height; i++)
-        *((uint8_t *)srcbuf + i) = lcg_rand_n(256);
+	*((uint8_t *)srcbuf + i) = lcg_rand_n (256);
+
     for (i = 0; i < dst_stride * dst_height; i++)
-        *((uint8_t *)dstbuf + i) = lcg_rand_n(256);
+	*((uint8_t *)dstbuf + i) = lcg_rand_n (256);
 
-    src_fmt = src_bpp == 4 ? (lcg_rand_n(2) == 0 ?
-        PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
+    src_fmt = src_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+                              PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
 
-    dst_fmt = dst_bpp == 4 ? (lcg_rand_n(2) == 0 ?
-        PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
+    dst_fmt = dst_bpp == 4 ? (lcg_rand_n (2) == 0 ?
+                              PIXMAN_a8r8g8b8 : PIXMAN_x8r8g8b8) : PIXMAN_r5g6b5;
 
-    src_img = pixman_image_create_bits(
+    src_img = pixman_image_create_bits (
         src_fmt, src_width, src_height, srcbuf, src_stride);
 
-    dst_img = pixman_image_create_bits(
+    dst_img = pixman_image_create_bits (
         dst_fmt, dst_width, dst_height, dstbuf, dst_stride);
 
-    image_endian_swap(src_img, src_bpp * 8);
-    image_endian_swap(dst_img, dst_bpp * 8);
+    image_endian_swap (src_img, src_bpp * 8);
+    image_endian_swap (dst_img, dst_bpp * 8);
 
-    if (lcg_rand_n(8) > 0) {
-        scale_x = 32768 + lcg_rand_n(65536);
-        scale_y = 32768 + lcg_rand_n(65536);
-        pixman_transform_init_scale(&transform, scale_x, scale_y);
-        pixman_image_set_transform(src_img, &transform);
+    if (lcg_rand_n (8) > 0)
+    {
+	scale_x = 32768 + lcg_rand_n (65536);
+	scale_y = 32768 + lcg_rand_n (65536);
+	pixman_transform_init_scale (&transform, scale_x, scale_y);
+	pixman_image_set_transform (src_img, &transform);
     }
 
-    switch (lcg_rand_n(4)) {
-        case 0: repeat = PIXMAN_REPEAT_NONE; break;
-        case 1: repeat = PIXMAN_REPEAT_NORMAL; break;
-        case 2: repeat = PIXMAN_REPEAT_PAD; break;
-        case 3: repeat = PIXMAN_REPEAT_REFLECT; break;
+    switch (lcg_rand_n (4))
+    {
+    case 0:
+	repeat = PIXMAN_REPEAT_NONE;
+	break;
+
+    case 1:
+	repeat = PIXMAN_REPEAT_NORMAL;
+	break;
+
+    case 2:
+	repeat = PIXMAN_REPEAT_PAD;
+	break;
+
+    case 3:
+	repeat = PIXMAN_REPEAT_REFLECT;
+	break;
     }
-    pixman_image_set_repeat(src_img, repeat);
-
-    if (verbose) {
-        printf("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
-        printf("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
-            op, scale_x, scale_y, repeat);
-        printf("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
-            src_width, src_height, dst_width, dst_height);
-        printf("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
-            src_x, src_y, dst_x, dst_y);
-        printf("w=%d, h=%d\n", w, h);
+    pixman_image_set_repeat (src_img, repeat);
+
+    if (verbose)
+    {
+	printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
+	printf ("op=%d, scale_x=%d, scale_y=%d, repeat=%d\n",
+	        op, scale_x, scale_y, repeat);
+	printf ("src_width=%d, src_height=%d, dst_width=%d, dst_height=%d\n",
+	        src_width, src_height, dst_width, dst_height);
+	printf ("src_x=%d, src_y=%d, dst_x=%d, dst_y=%d\n",
+	        src_x, src_y, dst_x, dst_y);
+	printf ("w=%d, h=%d\n", w, h);
     }
 
-    if (lcg_rand_n(8) == 0) {
-        pixman_box16_t clip_boxes[2];
-        int n = lcg_rand_n(2) + 1;
-        for (i = 0; i < n; i++) {
-            clip_boxes[i].x1 = lcg_rand_n(src_width);
-            clip_boxes[i].y1 = lcg_rand_n(src_height);
-            clip_boxes[i].x2 = clip_boxes[i].x1 + lcg_rand_n(src_width - clip_boxes[i].x1);
-            clip_boxes[i].y2 = clip_boxes[i].y1 + lcg_rand_n(src_height - clip_boxes[i].y1);
-            if (verbose) {
-                printf("source clip box: [%d,%d-%d,%d]\n",
-                    clip_boxes[i].x1, clip_boxes[i].y1,
-                    clip_boxes[i].x2, clip_boxes[i].y2);
-            }
-        }
-        pixman_region_init_rects(&clip, clip_boxes, n);
-        pixman_image_set_clip_region(src_img, &clip);
-        pixman_image_set_source_clipping(src_img, 1);
-        pixman_region_fini(&clip);
+    if (lcg_rand_n (8) == 0)
+    {
+	pixman_box16_t clip_boxes[2];
+	int            n = lcg_rand_n (2) + 1;
+
+	for (i = 0; i < n; i++)
+	{
+	    clip_boxes[i].x1 = lcg_rand_n (src_width);
+	    clip_boxes[i].y1 = lcg_rand_n (src_height);
+	    clip_boxes[i].x2 =
+		clip_boxes[i].x1 + lcg_rand_n (src_width - clip_boxes[i].x1);
+	    clip_boxes[i].y2 =
+		clip_boxes[i].y1 + lcg_rand_n (src_height - clip_boxes[i].y1);
+
+	    if (verbose)
+	    {
+		printf ("source clip box: [%d,%d-%d,%d]\n",
+		        clip_boxes[i].x1, clip_boxes[i].y1,
+		        clip_boxes[i].x2, clip_boxes[i].y2);
+	    }
+	}
+	
+	pixman_region_init_rects (&clip, clip_boxes, n);
+	pixman_image_set_clip_region (src_img, &clip);
+	pixman_image_set_source_clipping (src_img, 1);
+	pixman_region_fini (&clip);
     }
 
-    if (lcg_rand_n(8) == 0) {
-        pixman_box16_t clip_boxes[2];
-        int n = lcg_rand_n(2) + 1;
-        for (i = 0; i < n; i++) {
-            clip_boxes[i].x1 = lcg_rand_n(dst_width);
-            clip_boxes[i].y1 = lcg_rand_n(dst_height);
-            clip_boxes[i].x2 = clip_boxes[i].x1 + lcg_rand_n(dst_width - clip_boxes[i].x1);
-            clip_boxes[i].y2 = clip_boxes[i].y1 + lcg_rand_n(dst_height - clip_boxes[i].y1);
-            if (verbose) {
-                printf("destination clip box: [%d,%d-%d,%d]\n",
-                    clip_boxes[i].x1, clip_boxes[i].y1,
-                    clip_boxes[i].x2, clip_boxes[i].y2);
-            }
-        }
-        pixman_region_init_rects(&clip, clip_boxes, n);
-        pixman_image_set_clip_region(dst_img, &clip);
-        pixman_region_fini(&clip);
+    if (lcg_rand_n (8) == 0)
+    {
+	pixman_box16_t clip_boxes[2];
+	int            n = lcg_rand_n (2) + 1;
+	for (i = 0; i < n; i++)
+	{
+	    clip_boxes[i].x1 = lcg_rand_n (dst_width);
+	    clip_boxes[i].y1 = lcg_rand_n (dst_height);
+	    clip_boxes[i].x2 =
+		clip_boxes[i].x1 + lcg_rand_n (dst_width - clip_boxes[i].x1);
+	    clip_boxes[i].y2 =
+		clip_boxes[i].y1 + lcg_rand_n (dst_height - clip_boxes[i].y1);
+
+	    if (verbose)
+	    {
+		printf ("destination clip box: [%d,%d-%d,%d]\n",
+		        clip_boxes[i].x1, clip_boxes[i].y1,
+		        clip_boxes[i].x2, clip_boxes[i].y2);
+	    }
+	}
+	pixman_region_init_rects (&clip, clip_boxes, n);
+	pixman_image_set_clip_region (dst_img, &clip);
+	pixman_region_fini (&clip);
     }
 
     pixman_image_composite (op, src_img, NULL, dst_img,
                             src_x, src_y, 0, 0, dst_x, dst_y, w, h);
 
-    if (dst_fmt == PIXMAN_x8r8g8b8) {
-        /* ignore unused part */
-        for (i = 0; i < dst_stride * dst_height / 4; i++)
-            dstbuf[i] &= 0xFFFFFF;
+    if (dst_fmt == PIXMAN_x8r8g8b8)
+    {
+	/* ignore unused part */
+	for (i = 0; i < dst_stride * dst_height / 4; i++)
+	    dstbuf[i] &= 0xFFFFFF;
     }
 
-    image_endian_swap(dst_img, dst_bpp * 8);
+    image_endian_swap (dst_img, dst_bpp * 8);
+
+    if (verbose)
+    {
+	int j;
+	
+	for (i = 0; i < dst_height; i++)
+	{
+	    for (j = 0; j < dst_stride; j++)
+		printf ("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j));
 
-    if (verbose) {
-        int j;
-        for (i = 0; i < dst_height; i++) {
-            for (j = 0; j < dst_stride; j++) {
-                printf("%02X ", *((uint8_t *)dstbuf + i * dst_stride + j));
-            }
-            printf("\n");
-        }
+	    printf ("\n");
+	}
     }
 
     pixman_image_unref (src_img);
     pixman_image_unref (dst_img);
 
-    crc32 = Crc32_ComputeBuf(initcrc, dstbuf, dst_stride * dst_height);
-    free(srcbuf);
-    free(dstbuf);
+    crc32 = compute_crc32 (initcrc, dstbuf, dst_stride * dst_height);
+    free (srcbuf);
+    free (dstbuf);
     return crc32;
 }
 
-int main(int argc, char *argv[])
+int
+main (int   argc, char *argv[])
 {
-    int i, n = 0;
+    int      i, n = 0;
     uint32_t crc = 0;
 
-    pixman_disable_out_of_bounds_workaround();
-    
+    pixman_disable_out_of_bounds_workaround ();
+
     if (argc >= 2)
-        n = atoi(argv[1]);
+	n = atoi (argv[1]);
 
     if (n == 0) n = 3000000;
 
-    if (n < 0) {
-        crc = test_composite(0, -n, 1);
-        printf("crc32=%08X\n", crc);
+    if (n < 0)
+    {
+	crc = test_composite (0, -n, 1);
+	printf ("crc32=%08X\n", crc);
     }
-    else {
-        for (i = 1; i <= n; i++)
-        {
-            crc = test_composite(crc, i, 0);
-        }
-        printf("crc32=%08X\n", crc);
-
-        if (n == 3000000) {
-            /* predefined value for running with all the fastpath functions disabled  */
-            /* it needs to be updated every time changes are introduced to this program! */
-
-	    if (crc == 0x0B633CF4) {
-                printf("scaling test passed\n");
-            } else {
-                printf("scaling test failed!\n");
-                return 1;
-            }
-        }
+    else
+    {
+	for (i = 1; i <= n; i++)
+	    crc = test_composite (crc, i, 0);
+
+	printf ("crc32=%08X\n", crc);
+
+	if (n == 3000000)
+	{
+	    /* predefined value for running with all the fastpath functions disabled  */
+	    /* it needs to be updated every time changes are introduced to this program! */
+
+	    if (crc == 0x0B633CF4)
+	    {
+		printf ("scaling test passed\n");
+	    }
+	    else
+	    {
+		printf ("scaling test failed!\n");
+		return 1;
+	    }
+	}
     }
+
     return 0;
 }
commit 9a6ad34810421a30250ef331fb75b2a48ce9e564
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Mon Jul 13 02:37:19 2009 -0400

    Reformat pixman.h

diff --git a/pixman/pixman.h b/pixman/pixman.h
index 0a85e56..969d427 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -166,147 +166,98 @@ struct pixman_transform
 /* forward declaration (sorry) */
 struct pixman_box16;
 
-void
-pixman_transform_init_identity(struct pixman_transform *matrix);
-
-pixman_bool_t
-pixman_transform_point_3d (const struct pixman_transform *transform,
-			   struct pixman_vector *vector);
-
-pixman_bool_t
-pixman_transform_point(const struct pixman_transform *transform,
-		       struct pixman_vector *vector);
-
-pixman_bool_t
-pixman_transform_multiply (struct pixman_transform *dst,
-			   const struct pixman_transform *l,
-			   const struct pixman_transform *r);
-
-void
-pixman_transform_init_scale (struct pixman_transform *t,
-			     pixman_fixed_t sx,
-			     pixman_fixed_t sy);
-
-pixman_bool_t
-pixman_transform_scale(struct pixman_transform *forward,
-		       struct pixman_transform *reverse,
-		       pixman_fixed_t sx, pixman_fixed_t sy);
-
-void
-pixman_transform_init_rotate(struct pixman_transform *t,
-			     pixman_fixed_t cos,
-			     pixman_fixed_t sin);
-
-pixman_bool_t
-pixman_transform_rotate(struct pixman_transform *forward,
-			struct pixman_transform *reverse,
-			pixman_fixed_t c, pixman_fixed_t s);
-
-void
-pixman_transform_init_translate(struct pixman_transform *t,
-				pixman_fixed_t tx, pixman_fixed_t ty);
-
-
-pixman_bool_t
-pixman_transform_translate(struct pixman_transform *forward,
-			   struct pixman_transform *reverse,
-			   pixman_fixed_t tx, pixman_fixed_t ty);
-
-pixman_bool_t
-pixman_transform_bounds(const struct pixman_transform *matrix,
-			struct pixman_box16 *b);
-
-
-pixman_bool_t
-pixman_transform_invert (struct pixman_transform *dst,
-			 const struct pixman_transform *src);
-
-pixman_bool_t
-pixman_transform_is_identity(const struct pixman_transform *t);
-
-pixman_bool_t
-pixman_transform_is_scale(const struct pixman_transform *t);
-
-pixman_bool_t
-pixman_transform_is_int_translate(const struct pixman_transform *t);
-
-pixman_bool_t
-pixman_transform_is_inverse (const struct pixman_transform *a,
-			     const struct pixman_transform *b);
-
+void          pixman_transform_init_identity    (struct pixman_transform       *matrix);
+pixman_bool_t pixman_transform_point_3d         (const struct pixman_transform *transform,
+						 struct pixman_vector          *vector);
+pixman_bool_t pixman_transform_point            (const struct pixman_transform *transform,
+						 struct pixman_vector          *vector);
+pixman_bool_t pixman_transform_multiply         (struct pixman_transform       *dst,
+						 const struct pixman_transform *l,
+						 const struct pixman_transform *r);
+void          pixman_transform_init_scale       (struct pixman_transform       *t,
+						 pixman_fixed_t                 sx,
+						 pixman_fixed_t                 sy);
+pixman_bool_t pixman_transform_scale            (struct pixman_transform       *forward,
+						 struct pixman_transform       *reverse,
+						 pixman_fixed_t                 sx,
+						 pixman_fixed_t                 sy);
+void          pixman_transform_init_rotate      (struct pixman_transform       *t,
+						 pixman_fixed_t                 cos,
+						 pixman_fixed_t                 sin);
+pixman_bool_t pixman_transform_rotate           (struct pixman_transform       *forward,
+						 struct pixman_transform       *reverse,
+						 pixman_fixed_t                 c,
+						 pixman_fixed_t                 s);
+void          pixman_transform_init_translate   (struct pixman_transform       *t,
+						 pixman_fixed_t                 tx,
+						 pixman_fixed_t                 ty);
+pixman_bool_t pixman_transform_translate        (struct pixman_transform       *forward,
+						 struct pixman_transform       *reverse,
+						 pixman_fixed_t                 tx,
+						 pixman_fixed_t                 ty);
+pixman_bool_t pixman_transform_bounds           (const struct pixman_transform *matrix,
+						 struct pixman_box16           *b);
+pixman_bool_t pixman_transform_invert           (struct pixman_transform       *dst,
+						 const struct pixman_transform *src);
+pixman_bool_t pixman_transform_is_identity      (const struct pixman_transform *t);
+pixman_bool_t pixman_transform_is_scale         (const struct pixman_transform *t);
+pixman_bool_t pixman_transform_is_int_translate (const struct pixman_transform *t);
+pixman_bool_t pixman_transform_is_inverse       (const struct pixman_transform *a,
+						 const struct pixman_transform *b);
 
 /*
  * Floating point matrices
  */
-struct pixman_f_vector {
+struct pixman_f_vector
+{
     double  v[3];
 };
 
-struct pixman_f_transform {
+struct pixman_f_transform
+{
     double  m[3][3];
 };
 
-pixman_bool_t
-pixman_transform_from_pixman_f_transform (struct pixman_transform *t,
-					  const struct pixman_f_transform *ft);
-
-void
-pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft,
-					  const struct pixman_transform *t);
-
-pixman_bool_t
-pixman_transform_from_pixman_f_transform (struct pixman_transform *t,
-					  const struct pixman_f_transform *ft);
-
-pixman_bool_t
-pixman_f_transform_invert (struct pixman_f_transform *dst,
-			   const struct pixman_f_transform *src);
+pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform         *t,
+							const struct pixman_f_transform *ft);
+void          pixman_f_transform_from_pixman_transform (struct pixman_f_transform       *ft,
+							const struct pixman_transform   *t);
+pixman_bool_t pixman_transform_from_pixman_f_transform (struct pixman_transform         *t,
+							const struct pixman_f_transform *ft);
+pixman_bool_t pixman_f_transform_invert                (struct pixman_f_transform       *dst,
+							const struct pixman_f_transform *src);
+pixman_bool_t pixman_f_transform_point                 (const struct pixman_f_transform *t,
+							struct pixman_f_vector          *v);
+void          pixman_f_transform_point_3d              (const struct pixman_f_transform *t,
+							struct pixman_f_vector          *v);
+void          pixman_f_transform_multiply              (struct pixman_f_transform       *dst,
+							const struct pixman_f_transform *l,
+							const struct pixman_f_transform *r);
+void          pixman_f_transform_init_scale            (struct pixman_f_transform       *t,
+							double                           sx,
+							double                           sy);
+pixman_bool_t pixman_f_transform_scale                 (struct pixman_f_transform       *forward,
+							struct pixman_f_transform       *reverse,
+							double                           sx,
+							double                           sy);
+void          pixman_f_transform_init_rotate           (struct pixman_f_transform       *t,
+							double                           cos,
+							double                           sin);
+pixman_bool_t pixman_f_transform_rotate                (struct pixman_f_transform       *forward,
+							struct pixman_f_transform       *reverse,
+							double                           c,
+							double                           s);
+void          pixman_f_transform_init_translate        (struct pixman_f_transform       *t,
+							double                           tx,
+							double                           ty);
+pixman_bool_t pixman_f_transform_translate             (struct pixman_f_transform       *forward,
+							struct pixman_f_transform       *reverse,
+							double                           tx,
+							double                           ty);
+pixman_bool_t pixman_f_transform_bounds                (const struct pixman_f_transform *t,
+							struct pixman_box16             *b);
+void          pixman_f_transform_init_identity         (struct pixman_f_transform       *t);
 
-pixman_bool_t
-pixman_f_transform_point (const struct pixman_f_transform *t,
-			  struct pixman_f_vector *v);
-
-void
-pixman_f_transform_point_3d (const struct pixman_f_transform *t,
-			     struct pixman_f_vector	*v);
-
-
-void
-pixman_f_transform_multiply (struct pixman_f_transform *dst,
-			     const struct pixman_f_transform *l,
-			     const struct pixman_f_transform *r);
-
-void
-pixman_f_transform_init_scale (struct pixman_f_transform *t, double sx, double sy);
-
-pixman_bool_t
-pixman_f_transform_scale (struct pixman_f_transform *forward,
-			  struct pixman_f_transform *reverse,
-			  double sx, double sy);
-
-void
-pixman_f_transform_init_rotate (struct pixman_f_transform *t, double cos, double sin);
-
-pixman_bool_t
-pixman_f_transform_rotate (struct pixman_f_transform *forward,
-			   struct pixman_f_transform *reverse,
-			   double c, double s);
-
-void
-pixman_f_transform_init_translate (struct pixman_f_transform *t, double tx, double ty);
-
-pixman_bool_t
-pixman_f_transform_translate (struct pixman_f_transform *forward,
-			      struct pixman_f_transform *reverse,
-			      double tx, double ty);
-
-pixman_bool_t
-pixman_f_transform_bounds (const struct pixman_f_transform *t, struct pixman_box16 *b);
-
-void
-pixman_f_transform_init_identity (struct pixman_f_transform *t);
-
-/* Don't blame me, blame XRender */
 typedef enum
 {
     PIXMAN_REPEAT_NONE,
@@ -404,8 +355,8 @@ struct pixman_region16_data {
 
 struct pixman_rectangle16
 {
-    int16_t x, y;
-    uint16_t width, height;
+    int16_t	x, y;
+    uint16_t	width, height;
 };
 
 struct pixman_box16
@@ -416,7 +367,7 @@ struct pixman_box16
 struct pixman_region16
 {
     pixman_box16_t          extents;
-    pixman_region16_data_t  *data;
+    pixman_region16_data_t *data;
 };
 
 typedef enum
@@ -426,73 +377,69 @@ typedef enum
     PIXMAN_REGION_PART
 } pixman_region_overlap_t;
 
-/* This function exists only to make it possible to preserve the X ABI - it should
- * go away at first opportunity.
+/* This function exists only to make it possible to preserve
+ * the X ABI - it should go away at first opportunity.
  */
-void                    pixman_region_set_static_pointers (pixman_box16_t         *empty_box,
-							   pixman_region16_data_t *empty_data,
-							   pixman_region16_data_t *broken_data);
-
-
-
-void           pixman_disable_out_of_bounds_workaround (void);
+void pixman_region_set_static_pointers (pixman_box16_t         *empty_box,
+					pixman_region16_data_t *empty_data,
+					pixman_region16_data_t *broken_data);
 
 /* creation/destruction */
-void                    pixman_region_init                (pixman_region16_t      *region);
-void                    pixman_region_init_rect           (pixman_region16_t      *region,
-							   int                     x,
-							   int                     y,
-							   unsigned int            width,
-							   unsigned int            height);
-pixman_bool_t           pixman_region_init_rects          (pixman_region16_t      *region,
-							   pixman_box16_t         *boxes,
-							   int                     count);
-void                    pixman_region_init_with_extents   (pixman_region16_t      *region,
-							   pixman_box16_t         *extents);
-void                    pixman_region_fini                (pixman_region16_t      *region);
+void                    pixman_region_init               (pixman_region16_t *region);
+void                    pixman_region_init_rect          (pixman_region16_t *region,
+							  int                x,
+							  int                y,
+							  unsigned int       width,
+							  unsigned int       height);
+pixman_bool_t           pixman_region_init_rects         (pixman_region16_t *region,
+							  pixman_box16_t    *boxes,
+							  int                count);
+void                    pixman_region_init_with_extents  (pixman_region16_t *region,
+							  pixman_box16_t    *extents);
+void                    pixman_region_fini               (pixman_region16_t *region);
 
 
-/* manipulation */
-void                    pixman_region_translate           (pixman_region16_t      *region,
-							   int                     x,
-							   int                     y);
-pixman_bool_t           pixman_region_copy                (pixman_region16_t      *dest,
-							   pixman_region16_t      *source);
-pixman_bool_t           pixman_region_intersect           (pixman_region16_t      *new_reg,
-							   pixman_region16_t      *reg1,
-							   pixman_region16_t      *reg2);
-pixman_bool_t           pixman_region_union               (pixman_region16_t      *new_reg,
-							   pixman_region16_t      *reg1,
-							   pixman_region16_t      *reg2);
-pixman_bool_t           pixman_region_union_rect          (pixman_region16_t      *dest,
-							   pixman_region16_t      *source,
-							   int                     x,
-							   int                     y,
-							   unsigned int            width,
-							   unsigned int            height);
-pixman_bool_t           pixman_region_subtract            (pixman_region16_t      *reg_d,
-							   pixman_region16_t      *reg_m,
-							   pixman_region16_t      *reg_s);
-pixman_bool_t           pixman_region_inverse             (pixman_region16_t      *new_reg,
-							   pixman_region16_t      *reg1,
-							   pixman_box16_t         *inv_rect);
-pixman_bool_t           pixman_region_contains_point      (pixman_region16_t      *region,
-							   int                     x,
-							   int                     y,
-							   pixman_box16_t         *box);
-pixman_region_overlap_t pixman_region_contains_rectangle  (pixman_region16_t      *pixman_region16_t,
-							   pixman_box16_t         *prect);
-pixman_bool_t           pixman_region_not_empty           (pixman_region16_t      *region);
-pixman_box16_t *        pixman_region_extents             (pixman_region16_t      *region);
-int                     pixman_region_n_rects             (pixman_region16_t      *region);
-pixman_box16_t *        pixman_region_rectangles          (pixman_region16_t      *region,
-							   int                    *n_rects);
-pixman_bool_t           pixman_region_equal               (pixman_region16_t      *region1,
-							   pixman_region16_t      *region2);
-pixman_bool_t           pixman_region_selfcheck           (pixman_region16_t      *region);
-void                    pixman_region_reset               (pixman_region16_t      *region,
-							   pixman_box16_t         *box);
 
+/* manipulation */
+void                    pixman_region_translate          (pixman_region16_t *region,
+							  int                x,
+							  int                y);
+pixman_bool_t           pixman_region_copy               (pixman_region16_t *dest,
+							  pixman_region16_t *source);
+pixman_bool_t           pixman_region_intersect          (pixman_region16_t *new_reg,
+							  pixman_region16_t *reg1,
+							  pixman_region16_t *reg2);
+pixman_bool_t           pixman_region_union              (pixman_region16_t *new_reg,
+							  pixman_region16_t *reg1,
+							  pixman_region16_t *reg2);
+pixman_bool_t           pixman_region_union_rect         (pixman_region16_t *dest,
+							  pixman_region16_t *source,
+							  int                x,
+							  int                y,
+							  unsigned int       width,
+							  unsigned int       height);
+pixman_bool_t           pixman_region_subtract           (pixman_region16_t *reg_d,
+							  pixman_region16_t *reg_m,
+							  pixman_region16_t *reg_s);
+pixman_bool_t           pixman_region_inverse            (pixman_region16_t *new_reg,
+							  pixman_region16_t *reg1,
+							  pixman_box16_t    *inv_rect);
+pixman_bool_t           pixman_region_contains_point     (pixman_region16_t *region,
+							  int                x,
+							  int                y,
+							  pixman_box16_t    *box);
+pixman_region_overlap_t pixman_region_contains_rectangle (pixman_region16_t *pixman_region16_t,
+							  pixman_box16_t    *prect);
+pixman_bool_t           pixman_region_not_empty          (pixman_region16_t *region);
+pixman_box16_t *        pixman_region_extents            (pixman_region16_t *region);
+int                     pixman_region_n_rects            (pixman_region16_t *region);
+pixman_box16_t *        pixman_region_rectangles         (pixman_region16_t *region,
+							  int               *n_rects);
+pixman_bool_t           pixman_region_equal              (pixman_region16_t *region1,
+							  pixman_region16_t *region2);
+pixman_bool_t           pixman_region_selfcheck          (pixman_region16_t *region);
+void                    pixman_region_reset              (pixman_region16_t *region,
+							  pixman_box16_t    *box);
 /*
  * 32 bit regions
  */
@@ -678,67 +625,67 @@ struct pixman_indexed
 
 /* 32bpp formats */
 typedef enum {
-    PIXMAN_a8r8g8b8 =	PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
-    PIXMAN_x8r8g8b8 =	PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
-    PIXMAN_a8b8g8r8 =	PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
-    PIXMAN_x8b8g8r8 =	PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
-    PIXMAN_b8g8r8a8 =	PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
-    PIXMAN_b8g8r8x8 =	PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
+    PIXMAN_a8r8g8b8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,8,8,8,8),
+    PIXMAN_x8r8g8b8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,8,8,8),
+    PIXMAN_a8b8g8r8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,8,8,8,8),
+    PIXMAN_x8b8g8r8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,8,8,8),
+    PIXMAN_b8g8r8a8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,8,8,8,8),
+    PIXMAN_b8g8r8x8 =	 PIXMAN_FORMAT(32,PIXMAN_TYPE_BGRA,0,8,8,8),
     PIXMAN_x2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,0,10,10,10),
     PIXMAN_a2r10g10b10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ARGB,2,10,10,10),
     PIXMAN_x2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,0,10,10,10),
     PIXMAN_a2b10g10r10 = PIXMAN_FORMAT(32,PIXMAN_TYPE_ABGR,2,10,10,10),
 
 /* 24bpp formats */
-    PIXMAN_r8g8b8 =	PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
-    PIXMAN_b8g8r8 =	PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
+    PIXMAN_r8g8b8 =	 PIXMAN_FORMAT(24,PIXMAN_TYPE_ARGB,0,8,8,8),
+    PIXMAN_b8g8r8 =	 PIXMAN_FORMAT(24,PIXMAN_TYPE_ABGR,0,8,8,8),
     
 /* 16bpp formats */
-    PIXMAN_r5g6b5 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
-    PIXMAN_b5g6r5 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
+    PIXMAN_r5g6b5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,6,5),
+    PIXMAN_b5g6r5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,6,5),
     
-    PIXMAN_a1r5g5b5 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
-    PIXMAN_x1r5g5b5 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
-    PIXMAN_a1b5g5r5 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
-    PIXMAN_x1b5g5r5 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
-    PIXMAN_a4r4g4b4 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
-    PIXMAN_x4r4g4b4 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
-    PIXMAN_a4b4g4r4 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
-    PIXMAN_x4b4g4r4 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
+    PIXMAN_a1r5g5b5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,1,5,5,5),
+    PIXMAN_x1r5g5b5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,5,5,5),
+    PIXMAN_a1b5g5r5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,1,5,5,5),
+    PIXMAN_x1b5g5r5 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,5,5,5),
+    PIXMAN_a4r4g4b4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,4,4,4,4),
+    PIXMAN_x4r4g4b4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ARGB,0,4,4,4),
+    PIXMAN_a4b4g4r4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,4,4,4,4),
+    PIXMAN_x4b4g4r4 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_ABGR,0,4,4,4),
     
 /* 8bpp formats */
-    PIXMAN_a8 =		PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
-    PIXMAN_r3g3b2 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
-    PIXMAN_b2g3r3 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
-    PIXMAN_a2r2g2b2 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
-    PIXMAN_a2b2g2r2 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
+    PIXMAN_a8 =		 PIXMAN_FORMAT(8,PIXMAN_TYPE_A,8,0,0,0),
+    PIXMAN_r3g3b2 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,0,3,3,2),
+    PIXMAN_b2g3r3 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,0,3,3,2),
+    PIXMAN_a2r2g2b2 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ARGB,2,2,2,2),
+    PIXMAN_a2b2g2r2 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_ABGR,2,2,2,2),
     
-    PIXMAN_c8 =		PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_g8 =		PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
+    PIXMAN_c8 =		 PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
+    PIXMAN_g8 =		 PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
     
-    PIXMAN_x4a4 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
+    PIXMAN_x4a4 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_A,4,0,0,0),
     
-    PIXMAN_x4c4 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_x4g4 =	PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
+    PIXMAN_x4c4 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_COLOR,0,0,0,0),
+    PIXMAN_x4g4 =	 PIXMAN_FORMAT(8,PIXMAN_TYPE_GRAY,0,0,0,0),
     
 /* 4bpp formats */
-    PIXMAN_a4 =		PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
-    PIXMAN_r1g2b1 =	PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
-    PIXMAN_b1g2r1 =	PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
-    PIXMAN_a1r1g1b1 =	PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
-    PIXMAN_a1b1g1r1 =	PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
+    PIXMAN_a4 =		 PIXMAN_FORMAT(4,PIXMAN_TYPE_A,4,0,0,0),
+    PIXMAN_r1g2b1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,0,1,2,1),
+    PIXMAN_b1g2r1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,0,1,2,1),
+    PIXMAN_a1r1g1b1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ARGB,1,1,1,1),
+    PIXMAN_a1b1g1r1 =	 PIXMAN_FORMAT(4,PIXMAN_TYPE_ABGR,1,1,1,1),
     
-    PIXMAN_c4 =		PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
-    PIXMAN_g4 =		PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
+    PIXMAN_c4 =		 PIXMAN_FORMAT(4,PIXMAN_TYPE_COLOR,0,0,0,0),
+    PIXMAN_g4 =		 PIXMAN_FORMAT(4,PIXMAN_TYPE_GRAY,0,0,0,0),
     
 /* 1bpp formats */
-    PIXMAN_a1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
+    PIXMAN_a1 =		 PIXMAN_FORMAT(1,PIXMAN_TYPE_A,1,0,0,0),
     
-    PIXMAN_g1 =		PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
+    PIXMAN_g1 =		 PIXMAN_FORMAT(1,PIXMAN_TYPE_GRAY,0,0,0,0),
 
 /* YUV formats */
-    PIXMAN_yuy2 =	PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
-    PIXMAN_yv12 =	PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
+    PIXMAN_yuy2 =	 PIXMAN_FORMAT(16,PIXMAN_TYPE_YUY2,0,0,0,0),
+    PIXMAN_yv12 =	 PIXMAN_FORMAT(12,PIXMAN_TYPE_YV12,0,0,0,0)
 } pixman_format_code_t;
 
 /* Querying supported format values. */
@@ -840,6 +787,20 @@ void          pixman_image_composite          (pixman_op_t        op,
 					       uint16_t           width,
 					       uint16_t           height);
 
+/* Old X servers rely on out-of-bounds accesses when they are asked
+ * to composite with a window as the source. They create a pixman image
+ * pointing to some bogus position in memory, but then they set a clip
+ * region to the position where the actual bits are.
+ *
+ * Due to a bug in old versions of pixman, where it would not clip
+ * against the image bounds when a clip region was set, this would
+ * actually work. So by default we allow certain out-of-bound access
+ * to happen unless explicitly disabled.
+ *
+ * Fixed X servers should call this function to disable the workaround.
+ */
+void          pixman_disable_out_of_bounds_workaround (void);
+
 /*
  * Trapezoids
  */
@@ -857,26 +818,26 @@ struct pixman_edge
 {
     pixman_fixed_t	x;
     pixman_fixed_t	e;
-    pixman_fixed_t   stepx;
-    pixman_fixed_t   signdx;
-    pixman_fixed_t   dy;
-    pixman_fixed_t   dx;
-
-    pixman_fixed_t   stepx_small;
-    pixman_fixed_t   stepx_big;
-    pixman_fixed_t   dx_small;
-    pixman_fixed_t   dx_big;
+    pixman_fixed_t	stepx;
+    pixman_fixed_t	signdx;
+    pixman_fixed_t	dy;
+    pixman_fixed_t	dx;
+
+    pixman_fixed_t	stepx_small;
+    pixman_fixed_t	stepx_big;
+    pixman_fixed_t	dx_small;
+    pixman_fixed_t	dx_big;
 };
 
 struct pixman_trapezoid
 {
-    pixman_fixed_t  top, bottom;
+    pixman_fixed_t	top, bottom;
     pixman_line_fixed_t	left, right;
 };
 
 
 /* whether 't' is a well defined not obviously empty trapezoid */
-#define pixman_trapezoid_valid(t)				\
+#define pixman_trapezoid_valid(t)				   \
     ((t)->left.p1.y != (t)->left.p2.y &&			   \
      (t)->right.p1.y != (t)->right.p2.y &&			   \
      (int) ((t)->bottom - (t)->top) > 0)
@@ -930,5 +891,4 @@ void           pixman_rasterize_trapezoid  (pixman_image_t            *image,
 					    int                        x_off,
 					    int                        y_off);
 
-
 #endif /* PIXMAN_H__ */
commit 22f322fe246155d40465d4e14d65051a204f27f6
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Mon Jul 13 01:35:15 2009 -0400

    Reindent and reformat pixman-private.h

diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
index 466199d..9647dbb 100644
--- a/pixman/pixman-compiler.h
+++ b/pixman/pixman-compiler.h
@@ -54,18 +54,18 @@
 #   define inline __inline__
 #   define force_inline __inline__ __attribute__ ((__always_inline__))
 #else
-# ifndef force_inline
-#  define force_inline inline
-# endif
+#   ifndef force_inline
+#      define force_inline inline
+#   endif
 #endif
 
 /* GCC visibility */
 #if defined(__GNUC__) && __GNUC__ >= 4
-#define PIXMAN_EXPORT __attribute__ ((visibility("default")))
+#   define PIXMAN_EXPORT __attribute__ ((visibility("default")))
 /* Sun Studio 8 visibility */
 #elif defined(__SUNPRO_C) && (__SUNPRO_C >= 0x550)
-#define PIXMAN_EXPORT __global
+#   define PIXMAN_EXPORT __global
 #else
-#define PIXMAN_EXPORT
+#   define PIXMAN_EXPORT
 #endif
 
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 081d35c..07259a0 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -26,16 +26,23 @@ typedef struct radial_gradient radial_gradient_t;
 typedef struct bits_image bits_image_t;
 typedef struct circle circle_t;
 
-typedef void     (*fetch_scanline_t)  (pixman_image_t *pict,
-				       int x, int y, int width,
-				       uint32_t *buffer,
-				       const uint32_t *mask,
-				       uint32_t mask_bits);
-typedef void     (*fetch_pixels_t)    (bits_image_t *image,
-				       uint32_t *buffer, int n_pixels);
-typedef void     (*store_scanline_t)  (bits_image_t *image,
-				       int x, int y, int width,
-				       const uint32_t *values);
+typedef void (*fetch_scanline_t) (pixman_image_t *pict,
+				  int             x,
+				  int             y,
+				  int             width,
+				  uint32_t       *buffer,
+				  const uint32_t *mask,
+				  uint32_t        mask_bits);
+
+typedef void (*fetch_pixels_t)   (bits_image_t *  image,
+				  uint32_t *      buffer,
+				  int             n_pixels);
+
+typedef void (*store_scanline_t) (bits_image_t *  image,
+				  int             x,
+				  int             y,
+				  int             width,
+				  const uint32_t *values);
 
 typedef enum
 {
@@ -53,72 +60,72 @@ typedef enum
     SOURCE_IMAGE_CLASS_VERTICAL,
 } source_pict_class_t;
 
-typedef source_pict_class_t (* classify_func_t) (pixman_image_t *image,
-						 int             x,
-						 int             y,
-						 int             width,
-						 int             height);
-typedef void (* property_changed_func_t)        (pixman_image_t *image);
+typedef source_pict_class_t (*classify_func_t) (pixman_image_t *image,
+						int             x,
+						int             y,
+						int             width,
+						int             height);
+typedef void (*property_changed_func_t) (pixman_image_t *image);
 
 struct image_common
 {
-    image_type_t		type;
-    int32_t			ref_count;
-    pixman_region32_t		clip_region;
-    pixman_bool_t		have_clip_region;   /* FALSE if there is no clip */
-    pixman_bool_t		client_clip;	    /* Whether the source clip was
+    image_type_t                type;
+    int32_t                     ref_count;
+    pixman_region32_t           clip_region;
+    pixman_bool_t               have_clip_region;   /* FALSE if there is no clip */
+    pixman_bool_t               client_clip;        /* Whether the source clip was
 						       set by a client */
-    pixman_bool_t		clip_sources;	    /* Whether the clip applies when
+    pixman_bool_t               clip_sources;       /* Whether the clip applies when
 						     * the image is used as a source
 						     */
-    pixman_bool_t		need_workaround;
-    pixman_transform_t	       *transform;
-    pixman_repeat_t		repeat;
-    pixman_filter_t		filter;
-    pixman_fixed_t	       *filter_params;
-    int				n_filter_params;
-    bits_image_t	       *alpha_map;
-    int				alpha_origin_x;
-    int				alpha_origin_y;
-    pixman_bool_t		component_alpha;
-    pixman_read_memory_func_t	read_func;
-    pixman_write_memory_func_t	write_func;
-    classify_func_t		classify;
-    property_changed_func_t	property_changed;
-    fetch_scanline_t		get_scanline_32;
-    fetch_scanline_t		get_scanline_64;
+    pixman_bool_t               need_workaround;
+    pixman_transform_t *        transform;
+    pixman_repeat_t             repeat;
+    pixman_filter_t             filter;
+    pixman_fixed_t *            filter_params;
+    int                         n_filter_params;
+    bits_image_t *              alpha_map;
+    int                         alpha_origin_x;
+    int                         alpha_origin_y;
+    pixman_bool_t               component_alpha;
+    pixman_read_memory_func_t   read_func;
+    pixman_write_memory_func_t  write_func;
+    classify_func_t             classify;
+    property_changed_func_t     property_changed;
+    fetch_scanline_t            get_scanline_32;
+    fetch_scanline_t            get_scanline_64;
 
     pixman_image_destroy_func_t destroy_func;
-    void *			destroy_data;
+    void *                      destroy_data;
 };
 
 struct source_image
 {
-    image_common_t	common;
+    image_common_t common;
     source_pict_class_t class;
 };
 
 struct solid_fill
 {
-    source_image_t	common;
-    uint32_t		color;		/* FIXME: shouldn't this be a pixman_color_t? */
+    source_image_t common;
+    uint32_t       color;    /* FIXME: shouldn't this be a pixman_color_t? */
 };
 
 struct gradient
 {
-    source_image_t		common;
-    int				n_stops;
-    pixman_gradient_stop_t *	stops;
-    int				stop_range;
-    uint32_t *			color_table;
-    int				color_table_size;
+    source_image_t          common;
+    int                     n_stops;
+    pixman_gradient_stop_t *stops;
+    int                     stop_range;
+    uint32_t *              color_table;
+    int                     color_table_size;
 };
 
 struct linear_gradient
 {
-    gradient_t			common;
-    pixman_point_fixed_t	p1;
-    pixman_point_fixed_t	p2;
+    gradient_t           common;
+    pixman_point_fixed_t p1;
+    pixman_point_fixed_t p2;
 };
 
 struct circle
@@ -130,142 +137,162 @@ struct circle
 
 struct radial_gradient
 {
-    gradient_t	common;
-
-    circle_t	c1;
-    circle_t	c2;
-    double	cdx;
-    double	cdy;
-    double	dr;
-    double	A;
+    gradient_t common;
+
+    circle_t   c1;
+    circle_t   c2;
+    double     cdx;
+    double     cdy;
+    double     dr;
+    double     A;
 };
 
 struct conical_gradient
 {
-    gradient_t			common;
-    pixman_point_fixed_t	center;
-    pixman_fixed_t		angle;
+    gradient_t           common;
+    pixman_point_fixed_t center;
+    pixman_fixed_t       angle;
 };
 
 struct bits_image
 {
-    image_common_t		common;
-    pixman_format_code_t	format;
-    const pixman_indexed_t     *indexed;
-    int				width;
-    int				height;
-    uint32_t *			bits;
-    uint32_t *			free_me;
-    int				rowstride; /* in number of uint32_t's */
+    image_common_t             common;
+    pixman_format_code_t       format;
+    const pixman_indexed_t *   indexed;
+    int                        width;
+    int                        height;
+    uint32_t *                 bits;
+    uint32_t *                 free_me;
+    int                        rowstride;  /* in number of uint32_t's */
 
     /* Fetch raw pixels, with no regard for transformations, alpha map etc. */
-    fetch_pixels_t		fetch_pixels_raw_32;
-    fetch_pixels_t		fetch_pixels_raw_64;
+    fetch_pixels_t             fetch_pixels_raw_32;
+    fetch_pixels_t             fetch_pixels_raw_64;
 
     /* Fetch raw scanlines, with no regard for transformations, alpha maps etc. */
-    fetch_scanline_t			fetch_scanline_raw_32;
-    fetch_scanline_t			fetch_scanline_raw_64;
+    fetch_scanline_t           fetch_scanline_raw_32;
+    fetch_scanline_t           fetch_scanline_raw_64;
 
     /* Store scanlines with no regard for alpha maps */
-    store_scanline_t		store_scanline_raw_32;
-    store_scanline_t		store_scanline_raw_64;
+    store_scanline_t           store_scanline_raw_32;
+    store_scanline_t           store_scanline_raw_64;
 
     /* Store a scanline, taking alpha maps into account */
-    store_scanline_t		store_scanline_32;
-    store_scanline_t		store_scanline_64;
+    store_scanline_t           store_scanline_32;
+    store_scanline_t           store_scanline_64;
 
     /* Used for indirect access to the bits */
-    pixman_read_memory_func_t	read_func;
-    pixman_write_memory_func_t	write_func;
+    pixman_read_memory_func_t  read_func;
+    pixman_write_memory_func_t write_func;
 };
 
 union pixman_image
 {
-    image_type_t		type;
-    image_common_t		common;
-    bits_image_t		bits;
-    source_image_t		source;
-    gradient_t			gradient;
-    linear_gradient_t		linear;
-    conical_gradient_t		conical;
-    radial_gradient_t		radial;
-    solid_fill_t		solid;
+    image_type_t       type;
+    image_common_t     common;
+    bits_image_t       bits;
+    source_image_t     source;
+    gradient_t         gradient;
+    linear_gradient_t  linear;
+    conical_gradient_t conical;
+    radial_gradient_t  radial;
+    solid_fill_t       solid;
 };
 
 
 void
-_pixman_bits_image_setup_raw_accessors (bits_image_t   *image);
+_pixman_bits_image_setup_raw_accessors (bits_image_t *image);
 
 void
 _pixman_image_get_scanline_generic_64  (pixman_image_t *pict,
-					int             x,
-					int             y,
-					int             width,
-					uint32_t       *buffer,
-					const uint32_t *mask,
-					uint32_t        mask_bits);
+                                        int             x,
+                                        int             y,
+                                        int             width,
+                                        uint32_t *      buffer,
+                                        const uint32_t *mask,
+                                        uint32_t        mask_bits);
 
 source_pict_class_t
 _pixman_image_classify (pixman_image_t *image,
-			int             x,
-			int             y,
-			int             width,
-			int             height);
+                        int             x,
+                        int             y,
+                        int             width,
+                        int             height);
 
 void
-_pixman_image_get_scanline_32 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-			       const uint32_t *mask, uint32_t mask_bits);
+_pixman_image_get_scanline_32 (pixman_image_t *image,
+                               int             x,
+                               int             y,
+                               int             width,
+                               uint32_t *      buffer,
+                               const uint32_t *mask,
+                               uint32_t        mask_bits);
 
 /* Even thought the type of buffer is uint32_t *, the function actually expects
  * a uint64_t *buffer.
  */
 void
-_pixman_image_get_scanline_64 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-			       const uint32_t *unused, uint32_t unused2);
+_pixman_image_get_scanline_64 (pixman_image_t *image,
+                               int             x,
+                               int             y,
+                               int             width,
+                               uint32_t *      buffer,
+                               const uint32_t *unused,
+                               uint32_t        unused2);
 
 void
-_pixman_image_store_scanline_32 (bits_image_t *image, int x, int y, int width,
-				 const uint32_t *buffer);
+_pixman_image_store_scanline_32 (bits_image_t *  image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 const uint32_t *buffer);
 void
-_pixman_image_fetch_pixels (bits_image_t *image, uint32_t *buffer,
-			    int n_pixels);
+_pixman_image_fetch_pixels (bits_image_t *image,
+                            uint32_t *    buffer,
+                            int           n_pixels);
 
-/* Even thought the type of buffer is uint32_t *, the function actually expects
- * a uint64_t *buffer.
+/* Even though the type of buffer is uint32_t *, the function
+ * actually expects a uint64_t *buffer.
  */
 void
-_pixman_image_store_scanline_64 (bits_image_t *image, int x, int y, int width,
-				 const uint32_t *buffer);
+_pixman_image_store_scanline_64 (bits_image_t *  image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 const uint32_t *buffer);
 
 pixman_image_t *
 _pixman_image_allocate (void);
 
 pixman_bool_t
-_pixman_init_gradient (gradient_t                   *gradient,
-		       const pixman_gradient_stop_t *stops,
-		       int	                     n_stops);
+_pixman_init_gradient (gradient_t *                  gradient,
+                       const pixman_gradient_stop_t *stops,
+                       int                           n_stops);
 void
 _pixman_image_reset_clip_region (pixman_image_t *image);
 
 pixman_bool_t
-_pixman_image_is_opaque(pixman_image_t *image);
+_pixman_image_is_opaque (pixman_image_t *image);
 
 pixman_bool_t
 _pixman_image_is_solid (pixman_image_t *image);
 
 uint32_t
-_pixman_image_get_solid (pixman_image_t *image,
-			pixman_format_code_t format);
+_pixman_image_get_solid (pixman_image_t *     image,
+                         pixman_format_code_t format);
 
-#define PIXMAN_IMAGE_GET_LINE(pict,x,y,type,out_stride,line,mul) do {	\
-	uint32_t	*__bits__;					\
-	int		__stride__;					\
-									\
-	__bits__ = pict->bits.bits;					\
-	__stride__ = pict->bits.rowstride;				\
-	(out_stride) = __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type); \
-	(line) = ((type *) __bits__) +					\
-	    (out_stride) * (y) + (mul) * (x);				\
+#define PIXMAN_IMAGE_GET_LINE(pict, x, y, type, out_stride, line, mul)	\
+    do									\
+    {									\
+	uint32_t *__bits__;						\
+	int       __stride__;						\
+        								\
+	__bits__ = pict->bits.bits;                                     \
+	__stride__ = pict->bits.rowstride;                              \
+	(out_stride) =							\
+	    __stride__ * (int) sizeof (uint32_t) / (int) sizeof (type);	\
+	(line) =							\
+	    ((type *) __bits__) + (out_stride) * (y) + (mul) * (x);	\
     } while (0)
 
 /*
@@ -273,62 +300,64 @@ _pixman_image_get_solid (pixman_image_t *image,
  */
 typedef struct
 {
-    uint32_t        left_ag;
-    uint32_t        left_rb;
-    uint32_t        right_ag;
-    uint32_t        right_rb;
-    int32_t       left_x;
-    int32_t       right_x;
-    int32_t       stepper;
-
-    pixman_gradient_stop_t	*stops;
-    int                      num_stops;
-    unsigned int             spread;
-
-    int		  need_reset;
+    uint32_t                left_ag;
+    uint32_t                left_rb;
+    uint32_t                right_ag;
+    uint32_t                right_rb;
+    int32_t                 left_x;
+    int32_t                 right_x;
+    int32_t                 stepper;
+
+    pixman_gradient_stop_t *stops;
+    int                     num_stops;
+    unsigned int            spread;
+
+    int                     need_reset;
 } pixman_gradient_walker_t;
 
 void
-_pixman_gradient_walker_init (pixman_gradient_walker_t  *walker,
-			      gradient_t      *gradient,
-			      unsigned int     spread);
+_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
+                              gradient_t *              gradient,
+                              unsigned int              spread);
 
 void
-_pixman_gradient_walker_reset (pixman_gradient_walker_t       *walker,
-			       pixman_fixed_32_32_t  pos);
+_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
+                               pixman_fixed_32_32_t      pos);
 
 uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t       *walker,
-			       pixman_fixed_32_32_t  x);
+_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
+                               pixman_fixed_32_32_t      x);
 
 /*
  * Edges
  */
 
-#define MAX_ALPHA(n)	((1 << (n)) - 1)
-#define N_Y_FRAC(n)	((n) == 1 ? 1 : (1 << ((n)/2)) - 1)
-#define N_X_FRAC(n)	((n) == 1 ? 1 : (1 << ((n)/2)) + 1)
+#define MAX_ALPHA(n)    ((1 << (n)) - 1)
+#define N_Y_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) - 1)
+#define N_X_FRAC(n)     ((n) == 1 ? 1 : (1 << ((n) / 2)) + 1)
 
-#define STEP_Y_SMALL(n)	(pixman_fixed_1 / N_Y_FRAC(n))
-#define STEP_Y_BIG(n)	(pixman_fixed_1 - (N_Y_FRAC(n) - 1) * STEP_Y_SMALL(n))
+#define STEP_Y_SMALL(n) (pixman_fixed_1 / N_Y_FRAC (n))
+#define STEP_Y_BIG(n)   (pixman_fixed_1 - (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
 
-#define Y_FRAC_FIRST(n)	(STEP_Y_SMALL(n) / 2)
-#define Y_FRAC_LAST(n)	(Y_FRAC_FIRST(n) + (N_Y_FRAC(n) - 1) * STEP_Y_SMALL(n))
+#define Y_FRAC_FIRST(n) (STEP_Y_SMALL (n) / 2)
+#define Y_FRAC_LAST(n)  (Y_FRAC_FIRST (n) + (N_Y_FRAC (n) - 1) * STEP_Y_SMALL (n))
 
-#define STEP_X_SMALL(n)	(pixman_fixed_1 / N_X_FRAC(n))
-#define STEP_X_BIG(n)	(pixman_fixed_1 - (N_X_FRAC(n) - 1) * STEP_X_SMALL(n))
+#define STEP_X_SMALL(n) (pixman_fixed_1 / N_X_FRAC (n))
+#define STEP_X_BIG(n)   (pixman_fixed_1 - (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
 
-#define X_FRAC_FIRST(n)	(STEP_X_SMALL(n) / 2)
-#define X_FRAC_LAST(n)	(X_FRAC_FIRST(n) + (N_X_FRAC(n) - 1) * STEP_X_SMALL(n))
+#define X_FRAC_FIRST(n) (STEP_X_SMALL (n) / 2)
+#define X_FRAC_LAST(n)  (X_FRAC_FIRST (n) + (N_X_FRAC (n) - 1) * STEP_X_SMALL (n))
 
-#define RENDER_SAMPLES_X(x,n)	((n) == 1 ? 0 : (pixman_fixed_frac (x) + X_FRAC_FIRST(n)) / STEP_X_SMALL(n))
+#define RENDER_SAMPLES_X(x, n)						\
+    ((n) == 1? 0 : (pixman_fixed_frac (x) +				\
+		    X_FRAC_FIRST (n)) / STEP_X_SMALL (n))
 
 void
 pixman_rasterize_edges_accessors (pixman_image_t *image,
-				  pixman_edge_t	*l,
-				  pixman_edge_t	*r,
-				  pixman_fixed_t	t,
-				  pixman_fixed_t	b);
+                                  pixman_edge_t * l,
+                                  pixman_edge_t * r,
+                                  pixman_fixed_t  t,
+                                  pixman_fixed_t  b);
 
 /*
  * Implementations
@@ -336,166 +365,173 @@ pixman_rasterize_edges_accessors (pixman_image_t *image,
 
 typedef struct pixman_implementation_t pixman_implementation_t;
 
-typedef void (* pixman_combine_32_func_t) (pixman_implementation_t *	imp,
-					   pixman_op_t			op,
-					   uint32_t *			dest,
-					   const uint32_t *		src,
-					   const uint32_t *		mask,
-					   int				width);
-
-typedef void (* pixman_combine_64_func_t) (pixman_implementation_t *	imp,
-					   pixman_op_t			op,
-					   uint64_t *			dest,
-					   const uint64_t *		src,
-					   const uint64_t *		mask,
-					   int				width);
-
-typedef void (* pixman_composite_func_t)  (pixman_implementation_t *	imp,
-					   pixman_op_t			op,
-					   pixman_image_t *		src,
-					   pixman_image_t *		mask,
-					   pixman_image_t *		dest,
-					   int32_t			src_x,
-					   int32_t			src_y,
-					   int32_t			mask_x,
-					   int32_t			mask_y,
-					   int32_t			dest_x,
-					   int32_t			dest_y,
-					   int32_t			width,
-					   int32_t			height);
-typedef pixman_bool_t (* pixman_blt_func_t) (pixman_implementation_t *	imp,
-					     uint32_t *			src_bits,
-					     uint32_t *			dst_bits,
-					     int			src_stride,
-					     int			dst_stride,
-					     int			src_bpp,
-					     int			dst_bpp,
-					     int			src_x,
-					     int			src_y,
-					     int			dst_x,
-					     int			dst_y,
-					     int			width,
-					     int			height);
-typedef pixman_bool_t (* pixman_fill_func_t) (pixman_implementation_t *imp,
-					      uint32_t *bits,
-					      int stride,
-					      int bpp,
-					      int x,
-					      int y,
-					      int width,
-					      int height,
-					      uint32_t xor);
+typedef void (*pixman_combine_32_func_t) (pixman_implementation_t *imp,
+					  pixman_op_t              op,
+					  uint32_t *               dest,
+					  const uint32_t *         src,
+					  const uint32_t *         mask,
+					  int                      width);
+
+typedef void (*pixman_combine_64_func_t) (pixman_implementation_t *imp,
+					  pixman_op_t              op,
+					  uint64_t *               dest,
+					  const uint64_t *         src,
+					  const uint64_t *         mask,
+					  int                      width);
+
+typedef void (*pixman_composite_func_t) (pixman_implementation_t *imp,
+					 pixman_op_t              op,
+					 pixman_image_t *         src,
+					 pixman_image_t *         mask,
+					 pixman_image_t *         dest,
+					 int32_t                  src_x,
+					 int32_t                  src_y,
+					 int32_t                  mask_x,
+					 int32_t                  mask_y,
+					 int32_t                  dest_x,
+					 int32_t                  dest_y,
+					 int32_t                  width,
+					 int32_t                  height);
+typedef pixman_bool_t (*pixman_blt_func_t) (pixman_implementation_t *imp,
+					    uint32_t *               src_bits,
+					    uint32_t *               dst_bits,
+					    int                      src_stride,
+					    int                      dst_stride,
+					    int                      src_bpp,
+					    int                      dst_bpp,
+					    int                      src_x,
+					    int                      src_y,
+					    int                      dst_x,
+					    int                      dst_y,
+					    int                      width,
+					    int                      height);
+typedef pixman_bool_t (*pixman_fill_func_t) (pixman_implementation_t *imp,
+					     uint32_t *               bits,
+					     int                      stride,
+					     int                      bpp,
+					     int                      x,
+					     int                      y,
+					     int                      width,
+					     int                      height,
+					     uint32_t                 xor);
 
 void _pixman_setup_combiner_functions_32 (pixman_implementation_t *imp);
 void _pixman_setup_combiner_functions_64 (pixman_implementation_t *imp);
 
 struct pixman_implementation_t
 {
-    pixman_implementation_t *	toplevel;
-    pixman_implementation_t *	delegate;
-
-    pixman_composite_func_t	composite;
-    pixman_blt_func_t		blt;
-    pixman_fill_func_t		fill;
-    
-    pixman_combine_32_func_t	combine_32[PIXMAN_OP_LAST];
-    pixman_combine_32_func_t	combine_32_ca[PIXMAN_OP_LAST];
-    pixman_combine_64_func_t	combine_64[PIXMAN_OP_LAST];
-    pixman_combine_64_func_t	combine_64_ca[PIXMAN_OP_LAST];
+    pixman_implementation_t *toplevel;
+    pixman_implementation_t *delegate;
+
+    pixman_composite_func_t  composite;
+    pixman_blt_func_t        blt;
+    pixman_fill_func_t       fill;
+
+    pixman_combine_32_func_t combine_32[PIXMAN_OP_LAST];
+    pixman_combine_32_func_t combine_32_ca[PIXMAN_OP_LAST];
+    pixman_combine_64_func_t combine_64[PIXMAN_OP_LAST];
+    pixman_combine_64_func_t combine_64_ca[PIXMAN_OP_LAST];
 };
 
 pixman_implementation_t *
 _pixman_implementation_create (pixman_implementation_t *delegate);
 
 void
-_pixman_implementation_combine_32 (pixman_implementation_t *	imp,
-				   pixman_op_t			op,
-				   uint32_t *			dest,
-				   const uint32_t *		src,
-				   const uint32_t *		mask,
-				   int				width);
+_pixman_implementation_combine_32 (pixman_implementation_t *imp,
+                                   pixman_op_t              op,
+                                   uint32_t *               dest,
+                                   const uint32_t *         src,
+                                   const uint32_t *         mask,
+                                   int                      width);
 void
-_pixman_implementation_combine_64 (pixman_implementation_t *	imp,
-				   pixman_op_t			op,
-				   uint64_t *			dest,
-				   const uint64_t *		src,
-				   const uint64_t *		mask,
-				   int				width);
+_pixman_implementation_combine_64 (pixman_implementation_t *imp,
+                                   pixman_op_t              op,
+                                   uint64_t *               dest,
+                                   const uint64_t *         src,
+                                   const uint64_t *         mask,
+                                   int                      width);
 void
-_pixman_implementation_combine_32_ca (pixman_implementation_t *	imp,
-				      pixman_op_t		op,
-				      uint32_t *		dest,
-				      const uint32_t *		src,
-				      const uint32_t *		mask,
-				      int			width);
+_pixman_implementation_combine_32_ca (pixman_implementation_t *imp,
+                                      pixman_op_t              op,
+                                      uint32_t *               dest,
+                                      const uint32_t *         src,
+                                      const uint32_t *         mask,
+                                      int                      width);
 void
-_pixman_implementation_combine_64_ca (pixman_implementation_t *	imp,
-				      pixman_op_t		op,
-				      uint64_t *		dest,
-				      const uint64_t *		src,
-				      const uint64_t *		mask,
-				      int			width);
+_pixman_implementation_combine_64_ca (pixman_implementation_t *imp,
+                                      pixman_op_t              op,
+                                      uint64_t *               dest,
+                                      const uint64_t *         src,
+                                      const uint64_t *         mask,
+                                      int                      width);
 void
-_pixman_implementation_composite (pixman_implementation_t *	imp,
-				  pixman_op_t			op,
-				  pixman_image_t *		src,
-				  pixman_image_t *		mask,
-				  pixman_image_t *		dest,
-				  int32_t			src_x,
-				  int32_t			src_y,
-				  int32_t			mask_x,
-				  int32_t			mask_y,
-				  int32_t			dest_x,
-				  int32_t			dest_y,
-				  int32_t			width,
-				  int32_t			height);
+_pixman_implementation_composite (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  pixman_image_t *         src,
+                                  pixman_image_t *         mask,
+                                  pixman_image_t *         dest,
+                                  int32_t                  src_x,
+                                  int32_t                  src_y,
+                                  int32_t                  mask_x,
+                                  int32_t                  mask_y,
+                                  int32_t                  dest_x,
+                                  int32_t                  dest_y,
+                                  int32_t                  width,
+                                  int32_t                  height);
 
 pixman_bool_t
-_pixman_implementation_blt (pixman_implementation_t *	imp,
-			    uint32_t *			src_bits,
-			    uint32_t *			dst_bits,
-			    int				src_stride,
-			    int				dst_stride,
-			    int				src_bpp,
-			    int				dst_bpp,
-			    int				src_x,
-			    int				src_y,
-			    int				dst_x,
-			    int				dst_y,
-			    int				width,
-			    int				height);
+_pixman_implementation_blt (pixman_implementation_t *imp,
+                            uint32_t *               src_bits,
+                            uint32_t *               dst_bits,
+                            int                      src_stride,
+                            int                      dst_stride,
+                            int                      src_bpp,
+                            int                      dst_bpp,
+                            int                      src_x,
+                            int                      src_y,
+                            int                      dst_x,
+                            int                      dst_y,
+                            int                      width,
+                            int                      height);
+
 pixman_bool_t
-_pixman_implementation_fill (pixman_implementation_t *   imp,
-			     uint32_t *bits,
-			     int stride,
-			     int bpp,
-			     int x,
-			     int y,
-			     int width,
-			     int height,
-			     uint32_t xor);
-    
+_pixman_implementation_fill (pixman_implementation_t *imp,
+                             uint32_t *               bits,
+                             int                      stride,
+                             int                      bpp,
+                             int                      x,
+                             int                      y,
+                             int                      width,
+                             int                      height,
+                             uint32_t                 xor);
+
 /* Specific implementations */
 pixman_implementation_t *
 _pixman_implementation_create_general (void);
+
 pixman_implementation_t *
 _pixman_implementation_create_fast_path (void);
+
 #ifdef USE_MMX
 pixman_implementation_t *
 _pixman_implementation_create_mmx (void);
 #endif
+
 #ifdef USE_SSE2
 pixman_implementation_t *
 _pixman_implementation_create_sse2 (void);
 #endif
+
 #ifdef USE_ARM_SIMD
 pixman_implementation_t *
 _pixman_implementation_create_arm_simd (void);
 #endif
+
 #ifdef USE_ARM_NEON
 pixman_implementation_t *
 _pixman_implementation_create_arm_neon (void);
 #endif
+
 #ifdef USE_VMX
 pixman_implementation_t *
 _pixman_implementation_create_vmx (void);
@@ -513,21 +549,21 @@ _pixman_choose_implementation (void);
 /* These "formats" both have depth 0, so they
  * will never clash with any real ones
  */
-#define PIXMAN_null		PIXMAN_FORMAT(0,0,0,0,0,0)
-#define PIXMAN_solid		PIXMAN_FORMAT(0,1,0,0,0,0)
+#define PIXMAN_null             PIXMAN_FORMAT (0, 0, 0, 0, 0, 0)
+#define PIXMAN_solid            PIXMAN_FORMAT (0, 1, 0, 0, 0, 0)
 
-#define NEED_COMPONENT_ALPHA		(1 << 0)
-#define NEED_PIXBUF			(1 << 1)
-#define NEED_SOLID_MASK		        (1 << 2)
+#define NEED_COMPONENT_ALPHA            (1 << 0)
+#define NEED_PIXBUF                     (1 << 1)
+#define NEED_SOLID_MASK                 (1 << 2)
 
 typedef struct
 {
-    pixman_op_t			op;
-    pixman_format_code_t	src_format;
-    pixman_format_code_t	mask_format;
-    pixman_format_code_t	dest_format;
-    pixman_composite_func_t	func;
-    uint32_t			flags;
+    pixman_op_t             op;
+    pixman_format_code_t    src_format;
+    pixman_format_code_t    mask_format;
+    pixman_format_code_t    dest_format;
+    pixman_composite_func_t func;
+    uint32_t                flags;
 } pixman_fast_path_t;
 
 /* Memory allocation helpers */
@@ -546,51 +582,56 @@ pixman_addition_overflows_int (unsigned int a, unsigned int b);
 /* Compositing utilities */
 pixman_bool_t
 _pixman_run_fast_path (const pixman_fast_path_t *paths,
-		       pixman_implementation_t *imp,
-		       pixman_op_t op,
-		       pixman_image_t *src,
-		       pixman_image_t *mask,
-		       pixman_image_t *dest,
-		       int32_t src_x,
-		       int32_t src_y,
-		       int32_t mask_x,
-		       int32_t mask_y,
-		       int32_t dest_x,
-		       int32_t dest_y,
-		       int32_t width,
-		       int32_t height);
-    
+                       pixman_implementation_t * imp,
+                       pixman_op_t               op,
+                       pixman_image_t *          src,
+                       pixman_image_t *          mask,
+                       pixman_image_t *          dest,
+                       int32_t                   src_x,
+                       int32_t                   src_y,
+                       int32_t                   mask_x,
+                       int32_t                   mask_y,
+                       int32_t                   dest_x,
+                       int32_t                   dest_y,
+                       int32_t                   width,
+                       int32_t                   height);
+
 void
 _pixman_walk_composite_region (pixman_implementation_t *imp,
-			       pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int16_t src_x,
-			       int16_t src_y,
-			       int16_t mask_x,
-			       int16_t mask_y,
-			       int16_t dest_x,
-			       int16_t dest_y,
-			       uint16_t width,
-			       uint16_t height,
-			       pixman_composite_func_t composite_rect);
+                               pixman_op_t              op,
+                               pixman_image_t *         src_image,
+                               pixman_image_t *         mask_image,
+                               pixman_image_t *         dst_image,
+                               int16_t                  src_x,
+                               int16_t                  src_y,
+                               int16_t                  mask_x,
+                               int16_t                  mask_y,
+                               int16_t                  dest_x,
+                               int16_t                  dest_y,
+                               uint16_t                 width,
+                               uint16_t                 height,
+                               pixman_composite_func_t  composite_rect);
 
 void
-pixman_expand (uint64_t *dst, const uint32_t *src, pixman_format_code_t, int width);
+pixman_expand (uint64_t *           dst,
+               const uint32_t *     src,
+               pixman_format_code_t format,
+               int                  width);
 
 void
-pixman_contract (uint32_t *dst, const uint64_t *src, int width);
+pixman_contract (uint32_t *      dst,
+                 const uint64_t *src,
+                 int             width);
 
 
 /* Region Helpers */
 pixman_bool_t
 pixman_region32_copy_from_region16 (pixman_region32_t *dst,
-				    pixman_region16_t *src);
+                                    pixman_region16_t *src);
 
 pixman_bool_t
 pixman_region16_copy_from_region32 (pixman_region16_t *dst,
-				    pixman_region32_t *src);
+                                    pixman_region32_t *src);
 
 
 /* Misc macros */
@@ -604,35 +645,40 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
 #endif
 
 #ifndef MIN
-#  define MIN(a,b) ((a < b)? a : b)
+#  define MIN(a, b) ((a < b) ? a : b)
 #endif
 
 #ifndef MAX
-#  define MAX(a,b) ((a > b)? a : b)
+#  define MAX(a, b) ((a > b) ? a : b)
 #endif
 
 /* Integer division that rounds towards -infinity */
-#define DIV(a,b) ((((a) < 0) == ((b) < 0)) ? (a) / (b) :		\
-		  ((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
+#define DIV(a, b)					   \
+    ((((a) < 0) == ((b) < 0)) ? (a) / (b) :                \
+     ((a) - (b) + 1 - (((b) < 0) << 1)) / (b))
 
 /* Modulus that produces the remainder wrt. DIV */
-#define MOD(a,b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
+#define MOD(a, b) ((a) < 0 ? ((b) - ((-(a) - 1) % (b))) - 1 : (a) % (b))
 
-#define CLIP(v,low,high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
+#define CLIP(v, low, high) ((v) < (low) ? (low) : ((v) > (high) ? (high) : (v)))
 
 /* Conversion between 8888 and 0565 */
 
-#define CONVERT_8888_TO_0565(s)    ((((s) >> 3) & 0x001f) | \
-			     (((s) >> 5) & 0x07e0) | \
-			     (((s) >> 8) & 0xf800))
-#define CONVERT_0565_TO_0888(s)    (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) | \
-			     ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) | \
-			     ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
+#define CONVERT_8888_TO_0565(s)						\
+    ((((s) >> 3) & 0x001f) |						\
+     (((s) >> 5) & 0x07e0) |						\
+     (((s) >> 8) & 0xf800))
+
+#define CONVERT_0565_TO_0888(s)						\
+    (((((s) << 3) & 0xf8) | (((s) >> 2) & 0x7)) |			\
+     ((((s) << 5) & 0xfc00) | (((s) >> 1) & 0x300)) |			\
+     ((((s) << 8) & 0xf80000) | (((s) << 3) & 0x70000)))
 
-#define PIXMAN_FORMAT_IS_WIDE(f)	(PIXMAN_FORMAT_A(f) > 8 || \
-					 PIXMAN_FORMAT_R(f) > 8 || \
-					 PIXMAN_FORMAT_G(f) > 8 || \
-					 PIXMAN_FORMAT_B(f) > 8)
+#define PIXMAN_FORMAT_IS_WIDE(f)					\
+    (PIXMAN_FORMAT_A (f) > 8 ||						\
+     PIXMAN_FORMAT_R (f) > 8 ||						\
+     PIXMAN_FORMAT_G (f) > 8 ||						\
+     PIXMAN_FORMAT_B (f) > 8)
 
 /*
  * Various debugging code
@@ -643,44 +689,44 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
 
 #if DEBUG
 
-#define return_if_fail(expr)						\
-    do									\
-    {									\
-	if (!(expr))							\
-	{								\
-	    fprintf(stderr, "In %s: %s failed\n", FUNC, #expr);		\
-	    return;							\
-	}								\
-    }									\
+#define return_if_fail(expr)                                            \
+    do                                                                  \
+    {                                                                   \
+	if (!(expr))                                                    \
+	{                                                               \
+	    fprintf (stderr, "In %s: %s failed\n", FUNC, # expr);	\
+	    return;                                                     \
+	}                                                               \
+    }                                                                   \
     while (0)
 
-#define return_val_if_fail(expr, retval) 				\
-    do									\
-    {									\
-	if (!(expr))							\
-	{								\
-	    fprintf(stderr, "In %s: %s failed\n", FUNC, #expr);		\
-	    return (retval);						\
-	}								\
-    }									\
+#define return_val_if_fail(expr, retval)                                \
+    do                                                                  \
+    {                                                                   \
+	if (!(expr))                                                    \
+	{                                                               \
+	    fprintf (stderr, "In %s: %s failed\n", FUNC, # expr);	\
+	    return (retval);                                            \
+	}                                                               \
+    }                                                                   \
     while (0)
 
 #else
 
-#define return_if_fail(expr)						\
-    do									\
-    {									\
+#define return_if_fail(expr)                                            \
+    do                                                                  \
+    {                                                                   \
 	if (!(expr))							\
 	    return;							\
-    }									\
+    }                                                                   \
     while (0)
 
-#define return_val_if_fail(expr, retval)				\
-    do									\
-    {									\
+#define return_val_if_fail(expr, retval)                                \
+    do                                                                  \
+    {                                                                   \
 	if (!(expr))							\
 	    return (retval);						\
-    }									\
+    }                                                                   \
     while (0)
 
 #endif
@@ -695,42 +741,45 @@ static inline uint64_t
 oil_profile_stamp_rdtsc (void)
 {
     uint64_t ts;
-    __asm__ __volatile__("rdtsc\n" : "=A" (ts));
+
+    __asm__ __volatile__ ("rdtsc\n" : "=A" (ts));
     return ts;
 }
+
 #define OIL_STAMP oil_profile_stamp_rdtsc
 
 typedef struct pixman_timer_t pixman_timer_t;
 
 struct pixman_timer_t
 {
-    int initialized;
-    const char *name;
-    uint64_t n_times;
-    uint64_t total;
+    int             initialized;
+    const char *    name;
+    uint64_t        n_times;
+    uint64_t        total;
     pixman_timer_t *next;
 };
 
 extern int timer_defined;
+
 void pixman_timer_register (pixman_timer_t *timer);
 
-#define TIMER_BEGIN(tname)						\
-    {									\
-	static pixman_timer_t	timer##tname;				\
-	uint64_t		begin##tname;				\
-									\
-	if (!timer##tname.initialized)					\
-	{								\
-	    timer##tname.initialized = 1;				\
-	    timer##tname.name = #tname;					\
-	    pixman_timer_register (&timer##tname);			\
-	}								\
+#define TIMER_BEGIN(tname)                                              \
+    {                                                                   \
+	static pixman_timer_t timer ## tname;                           \
+	uint64_t              begin ## tname;                           \
+        								\
+	if (!timer ## tname.initialized)				\
+	{                                                               \
+	    timer ## tname.initialized = 1;				\
+	    timer ## tname.name = # tname;				\
+	    pixman_timer_register (&timer ## tname);			\
+	}                                                               \
 									\
-	timer##tname.n_times++;						\
-	begin##tname = OIL_STAMP();
+	timer ## tname.n_times++;					\
+	begin ## tname = OIL_STAMP ();
 
-#define TIMER_END(tname)						\
-        timer##tname.total += OIL_STAMP() - begin##tname;		\
+#define TIMER_END(tname)                                                \
+    timer ## tname.total += OIL_STAMP () - begin ## tname;		\
     }
 
 #endif /* PIXMAN_TIMERS */
commit b4d196009881a4121b49996bdc87f7770bfa5c1b
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Mon Jul 13 01:17:53 2009 -0400

    Reindent and reformat pixman-combine.h.template

diff --git a/pixman/pixman-combine.h.template b/pixman/pixman-combine.h.template
index 302d36a..2b4bb60 100644
--- a/pixman/pixman-combine.h.template
+++ b/pixman/pixman-combine.h.template
@@ -24,188 +24,206 @@
  * Helper macros.
  */
 
-#define MUL_UNc(a,b,t) ( (t) = (a) * (b) + ONE_HALF, ( ( ( (t)>>G_SHIFT ) + (t) )>>G_SHIFT ) )
-#define DIV_UNc(a,b)    (((comp2_t) (a) * MASK) / (b))
-#define ADD_UNc(x,y,t) (							\
-	(t) = x + y,                                                    \
-	(comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT))))
+#define MUL_UNc(a, b, t)						\
+    ((t) = (a) * (b) + ONE_HALF, ((((t) >> G_SHIFT ) + (t) ) >> G_SHIFT ))
 
-#define DIV_ONE_UNc(x)      (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
+#define DIV_UNc(a, b)							\
+    (((comp2_t) (a) * MASK) / (b))
+
+#define ADD_UNc(x, y, t)				     \
+    ((t) = x + y,					     \
+     (comp4_t) (comp1_t) ((t) | (0 - ((t) >> G_SHIFT))))
+
+#define DIV_ONE_UNc(x)							\
+    (((x) + ONE_HALF + (((x) + ONE_HALF) >> G_SHIFT)) >> G_SHIFT)
 
 /*
-  The methods below use some tricks to be able to do two color
-  components at the same time.
-*/
+ * The methods below use some tricks to be able to do two color
+ * components at the same time.
+ */
 
 /*
-  x_c = (x_c * a) / 255
-*/
-#define UNcx4_MUL_UNc(x, a) do {                                            \
-        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
-        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
-        t &= RB_MASK;                                                   \
-                                                                        \
-        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
-        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK));                    \
-        x &= RB_MASK << COMPONENT_SIZE;                                 \
-        x += t;                                                         \
+ * x_c = (x_c * a) / 255
+ */
+#define UNcx4_MUL_UNc(x, a)						\
+    do									\
+    {									\
+	comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
+	t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+	t &= RB_MASK;                                                   \
+                                                                        \
+	x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
+	x = (x + ((x >> COMPONENT_SIZE) & RB_MASK));                    \
+	x &= RB_MASK << COMPONENT_SIZE;                                 \
+	x += t;                                                         \
     } while (0)
 
 /*
-  x_c = (x_c * a) / 255 + y_c
-*/
-#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y) do {                                      \
-        /* multiply and divide: trunc((i + 128)*257/65536) */           \
-        comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
-        t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
-        t &= RB_MASK;                                                   \
-                                                                        \
-        /* add */                                                       \
-        t += y & RB_MASK;                                               \
-                                                                        \
-        /* saturate */                                                  \
-        t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK);      \
-        t &= RB_MASK;                                                   \
-                                                                        \
-        /* multiply and divide */                                       \
-        x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
-        x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
-        x &= RB_MASK;                                                   \
-                                                                        \
-        /* add */                                                       \
-        x += (y >> COMPONENT_SIZE) & RB_MASK;                           \
-                                                                        \
-        /* saturate */                                                  \
-        x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK);      \
-        x &= RB_MASK;                                                   \
-                                                                        \
-        /* recombine */                                                 \
-        x <<= COMPONENT_SIZE;                                           \
-        x += t;                                                         \
+ * x_c = (x_c * a) / 255 + y_c
+ */
+#define UNcx4_MUL_UNc_ADD_UNcx4(x, a, y)				\
+    do									\
+    {									\
+	/* multiply and divide: trunc((i + 128)*257/65536) */           \
+	comp4_t t = ((x & RB_MASK) * a) + RB_ONE_HALF;                  \
+	t = (t + ((t >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+	t &= RB_MASK;                                                   \
+                                                                        \
+	/* add */                                                       \
+	t += y & RB_MASK;                                               \
+                                                                        \
+	/* saturate */                                                  \
+	t |= RB_MASK_PLUS_ONE - ((t >> COMPONENT_SIZE) & RB_MASK);      \
+	t &= RB_MASK;                                                   \
+                                                                        \
+	/* multiply and divide */                                       \
+	x = (((x >> COMPONENT_SIZE) & RB_MASK) * a) + RB_ONE_HALF;      \
+	x = (x + ((x >> COMPONENT_SIZE) & RB_MASK)) >> COMPONENT_SIZE;  \
+	x &= RB_MASK;                                                   \
+                                                                        \
+	/* add */                                                       \
+	x += (y >> COMPONENT_SIZE) & RB_MASK;                           \
+                                                                        \
+	/* saturate */                                                  \
+	x |= RB_MASK_PLUS_ONE - ((x >> COMPONENT_SIZE) & RB_MASK);      \
+	x &= RB_MASK;                                                   \
+                                                                        \
+	/* recombine */                                                 \
+	x <<= COMPONENT_SIZE;                                           \
+	x += t;                                                         \
     } while (0)
 
 /*
-  x_c = (x_c * a + y_c * b) / 255
-*/
-#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b) do {                                   \
-        comp4_t t;                                                      \
-        comp4_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \
-        r += (r >> G_SHIFT);                                            \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        t = (x & G_MASK) * a + (y & G_MASK) * b;                        \
-        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
-        t >>= R_SHIFT;                                                  \
-                                                                        \
-        t |= r << R_SHIFT;                                              \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        t &= RB_MASK;                                                   \
-        t <<= G_SHIFT;                                                  \
-                                                                        \
-        r = ((x >> R_SHIFT) & MASK) * a +                               \
-            ((y >> R_SHIFT) & MASK) * b + ONE_HALF;                     \
-        r += (r >> G_SHIFT);                                            \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        x = (x & MASK) * a + (y & MASK) * b + ONE_HALF;                 \
-        x += (x >> G_SHIFT);                                            \
-        x >>= G_SHIFT;                                                  \
-        x |= r << R_SHIFT;                                              \
-        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
-        x &= RB_MASK;                                                   \
-        x |= t;                                                         \
+ * x_c = (x_c * a + y_c * b) / 255
+ */
+#define UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(x, a, y, b)			\
+    do									\
+    {									\
+	comp4_t t;                                                      \
+	comp4_t r = (x >> A_SHIFT) * a + (y >> A_SHIFT) * b + ONE_HALF; \
+	r += (r >> G_SHIFT);                                            \
+	r >>= G_SHIFT;                                                  \
+                                                                        \
+	t = (x & G_MASK) * a + (y & G_MASK) * b;                        \
+	t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
+	t >>= R_SHIFT;                                                  \
+                                                                        \
+	t |= r << R_SHIFT;                                              \
+	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+	t &= RB_MASK;                                                   \
+	t <<= G_SHIFT;                                                  \
+                                                                        \
+	r = ((x >> R_SHIFT) & MASK) * a +                               \
+	    ((y >> R_SHIFT) & MASK) * b + ONE_HALF;                     \
+	r += (r >> G_SHIFT);                                            \
+	r >>= G_SHIFT;                                                  \
+                                                                        \
+	x = (x & MASK) * a + (y & MASK) * b + ONE_HALF;                 \
+	x += (x >> G_SHIFT);                                            \
+	x >>= G_SHIFT;                                                  \
+	x |= r << R_SHIFT;                                              \
+	x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
+	x &= RB_MASK;                                                   \
+	x |= t;                                                         \
     } while (0)
 
 /*
-  x_c = (x_c * a_c) / 255
-*/
-#define UNcx4_MUL_UNcx4(x, a) do {                                           \
-        comp4_t t;                                                      \
-        comp4_t r = (x & MASK) * (a & MASK);                            \
-        r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);                    \
-        r += RB_ONE_HALF;                                               \
-        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
-        r &= RB_MASK;                                                   \
-                                                                        \
-        x >>= G_SHIFT;                                                  \
-        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
-        t |= (x & R_MASK) * (a >> A_SHIFT);                             \
-        t += RB_ONE_HALF;                                               \
-        t = t + ((t >> G_SHIFT) & RB_MASK);                             \
-        x = r | (t & AG_MASK);                                          \
+ * x_c = (x_c * a_c) / 255
+ */
+#define UNcx4_MUL_UNcx4(x, a)						\
+    do									\
+    {									\
+	comp4_t t;                                                      \
+	comp4_t r = (x & MASK) * (a & MASK);                            \
+	r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);                    \
+	r += RB_ONE_HALF;                                               \
+	r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+	r &= RB_MASK;                                                   \
+                                                                        \
+	x >>= G_SHIFT;                                                  \
+	t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
+	t |= (x & R_MASK) * (a >> A_SHIFT);                             \
+	t += RB_ONE_HALF;                                               \
+	t = t + ((t >> G_SHIFT) & RB_MASK);                             \
+	x = r | (t & AG_MASK);                                          \
     } while (0)
 
 /*
-  x_c = (x_c * a_c) / 255 + y_c
-*/
-#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y) do {                                     \
-        comp4_t t;                                                      \
-        comp4_t r = (x & MASK) * (a & MASK);                            \
-        r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);                    \
-        r += RB_ONE_HALF;                                               \
-        r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
-        r &= RB_MASK;                                                   \
-        r += y & RB_MASK;                                               \
-        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
-        r &= RB_MASK;                                                   \
-                                                                        \
-        x >>= G_SHIFT;                                                  \
-        t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
-        t |= (x & R_MASK) * (a >> A_SHIFT);                             \
-        t += RB_ONE_HALF;                                               \
-        t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
-        t &= RB_MASK;                                                   \
-        t += (y >> G_SHIFT) & RB_MASK;                                  \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        t &= RB_MASK;                                                   \
-        x = r | (t << G_SHIFT);                                         \
+ * x_c = (x_c * a_c) / 255 + y_c
+ */
+#define UNcx4_MUL_UNcx4_ADD_UNcx4(x, a, y)				\
+    do									\
+    {									\
+	comp4_t t;                                                      \
+	comp4_t r = (x & MASK) * (a & MASK);                            \
+	r |= (x & R_MASK) * ((a >> R_SHIFT) & MASK);                    \
+	r += RB_ONE_HALF;                                               \
+	r = (r + ((r >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+	r &= RB_MASK;                                                   \
+	r += y & RB_MASK;                                               \
+	r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
+	r &= RB_MASK;                                                   \
+                                                                        \
+	x >>= G_SHIFT;                                                  \
+	t = (x & MASK) * ((a >> G_SHIFT) & MASK);                       \
+	t |= (x & R_MASK) * (a >> A_SHIFT);                             \
+	t += RB_ONE_HALF;                                               \
+	t = (t + ((t >> G_SHIFT) & RB_MASK)) >> G_SHIFT;                \
+	t &= RB_MASK;                                                   \
+	t += (y >> G_SHIFT) & RB_MASK;                                  \
+	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+	t &= RB_MASK;                                                   \
+	x = r | (t << G_SHIFT);                                         \
     } while (0)
 
 /*
-  x_c = (x_c * a_c + y_c * b) / 255
-*/
-#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b) do {                                  \
-        comp4_t t;                                                      \
-        comp4_t r = (x >> A_SHIFT) * (a >> A_SHIFT) +                   \
-                     (y >> A_SHIFT) * b;                                \
-        r += (r >> G_SHIFT) + ONE_HALF;                                 \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b;  \
-        t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
-        t >>= R_SHIFT;                                                  \
-                                                                        \
-        t |= r << R_SHIFT;                                              \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        t &= RB_MASK;                                                   \
-        t <<= G_SHIFT;                                                  \
-                                                                        \
-        r = ((x >> R_SHIFT) & MASK) * ((a >> R_SHIFT) & MASK) +         \
-            ((y >> R_SHIFT) & MASK) * b + ONE_HALF;                     \
-        r += (r >> G_SHIFT);                                            \
-        r >>= G_SHIFT;                                                  \
-                                                                        \
-        x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF;        \
-        x += (x >> G_SHIFT);                                            \
-        x >>= G_SHIFT;                                                  \
-        x |= r << R_SHIFT;                                              \
-        x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
-        x &= RB_MASK;                                                   \
-        x |= t;                                                         \
+ * x_c = (x_c * a_c + y_c * b) / 255
+ */
+#define UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(x, a, y, b)			\
+    do									\
+    {									\
+	comp4_t t;                                                      \
+	comp4_t r = (x >> A_SHIFT) * (a >> A_SHIFT) +                   \
+	    (y >> A_SHIFT) * b;						\
+	r += (r >> G_SHIFT) + ONE_HALF;                                 \
+	r >>= G_SHIFT;                                                  \
+        								\
+	t = (x & G_MASK) * ((a >> G_SHIFT) & MASK) + (y & G_MASK) * b;  \
+	t += (t >> G_SHIFT) + (ONE_HALF << G_SHIFT);                    \
+	t >>= R_SHIFT;                                                  \
+        								\
+	t |= r << R_SHIFT;                                              \
+	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+	t &= RB_MASK;                                                   \
+	t <<= G_SHIFT;                                                  \
+									\
+	r = ((x >> R_SHIFT) & MASK) * ((a >> R_SHIFT) & MASK) +         \
+	    ((y >> R_SHIFT) & MASK) * b + ONE_HALF;                     \
+	r += (r >> G_SHIFT);                                            \
+	r >>= G_SHIFT;                                                  \
+        								\
+	x = (x & MASK) * (a & MASK) + (y & MASK) * b + ONE_HALF;        \
+	x += (x >> G_SHIFT);                                            \
+	x >>= G_SHIFT;                                                  \
+	x |= r << R_SHIFT;                                              \
+	x |= RB_MASK_PLUS_ONE - ((x >> G_SHIFT) & RB_MASK);             \
+	x &= RB_MASK;                                                   \
+	x |= t;                                                         \
     } while (0)
 
 /*
-  x_c = min(x_c + y_c, 255)
-*/
-#define UNcx4_ADD_UNcx4(x, y) do {                                            \
-        comp4_t t;                                                      \
-        comp4_t r = (x & RB_MASK) + (y & RB_MASK);                      \
-        r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
-        r &= RB_MASK;                                                   \
-                                                                        \
-        t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK);    \
-        t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
-        r |= (t & RB_MASK) << G_SHIFT;                                  \
-        x = r;                                                          \
+   x_c = min(x_c + y_c, 255)
+ */
+#define UNcx4_ADD_UNcx4(x, y)						\
+    do									\
+    {									\
+	comp4_t t;                                                      \
+	comp4_t r = (x & RB_MASK) + (y & RB_MASK);                      \
+	r |= RB_MASK_PLUS_ONE - ((r >> G_SHIFT) & RB_MASK);             \
+	r &= RB_MASK;                                                   \
+        								\
+	t = ((x >> G_SHIFT) & RB_MASK) + ((y >> G_SHIFT) & RB_MASK);    \
+	t |= RB_MASK_PLUS_ONE - ((t >> G_SHIFT) & RB_MASK);             \
+	r |= (t & RB_MASK) << G_SHIFT;                                  \
+	x = r;                                                          \
     } while (0)
-
commit f54c776e75a44a095304dd7374384a2a0c96d479
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Mon Jul 13 01:13:47 2009 -0400

    Reindent and reformat pixman-combine.c.template

diff --git a/pixman/pixman-combine.c.template b/pixman/pixman-combine.c.template
index a937bf7..54aa291 100644
--- a/pixman/pixman-combine.c.template
+++ b/pixman/pixman-combine.c.template
@@ -16,8 +16,8 @@ combine_mask_ca (comp4_t *src, comp4_t *mask)
 {
     comp4_t a = *mask;
 
-    comp4_t	x;
-    comp2_t	xa;
+    comp4_t x;
+    comp2_t xa;
 
     if (!a)
     {
@@ -36,9 +36,10 @@ combine_mask_ca (comp4_t *src, comp4_t *mask)
     }
 
     xa = x >> A_SHIFT;
-    UNcx4_MUL_UNcx4(x, a);
+    UNcx4_MUL_UNcx4 (x, a);
     *(src) = x;
-    UNcx4_MUL_UNc(a, xa);
+    
+    UNcx4_MUL_UNc (a, xa);
     *(mask) = a;
 }
 
@@ -46,7 +47,7 @@ static void
 combine_mask_value_ca (comp4_t *src, const comp4_t *mask)
 {
     comp4_t a = *mask;
-    comp4_t	x;
+    comp4_t x;
 
     if (!a)
     {
@@ -58,15 +59,15 @@ combine_mask_value_ca (comp4_t *src, const comp4_t *mask)
 	return;
 
     x = *(src);
-    UNcx4_MUL_UNcx4(x, a);
-    *(src) =x;
+    UNcx4_MUL_UNcx4 (x, a);
+    *(src) = x;
 }
 
 static void
 combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask)
 {
     comp4_t a = *(mask);
-    comp4_t	x;
+    comp4_t x;
 
     if (!a)
 	return;
@@ -74,6 +75,7 @@ combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask)
     x = *(src) >> A_SHIFT;
     if (x == MASK)
 	return;
+
     if (a == ~0)
     {
 	x = x >> A_SHIFT;
@@ -83,12 +85,10 @@ combine_mask_alpha_ca (const comp4_t *src, comp4_t *mask)
 	return;
     }
 
-    UNcx4_MUL_UNc(a, x);
+    UNcx4_MUL_UNc (a, x);
     *(mask) = a;
 }
 
-
-
 /*
  * There are two ways of handling alpha -- either as a single unified value or
  * a separate value for each component, hence each macro must have two
@@ -123,15 +123,23 @@ combine_mask (const comp4_t *src, const comp4_t *mask, int i)
 }
 
 static void
-combine_clear (pixman_implementation_t *imp, pixman_op_t op,
-		comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_clear (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               comp4_t *                dest,
+               const comp4_t *          src,
+               const comp4_t *          mask,
+               int                      width)
 {
-    memset(dest, 0, width*sizeof(comp4_t));
+    memset (dest, 0, width * sizeof(comp4_t));
 }
 
 static void
-combine_src_u (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_src_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               comp4_t *                dest,
+               const comp4_t *          src,
+               const comp4_t *          mask,
+               int                      width)
 {
     int i;
 
@@ -142,7 +150,7 @@ combine_src_u (pixman_implementation_t *imp, pixman_op_t op,
 	for (i = 0; i < width; ++i)
 	{
 	    comp4_t s = combine_mask (src, mask, i);
-	    
+
 	    *(dest + i) = s;
 	}
     }
@@ -150,89 +158,125 @@ combine_src_u (pixman_implementation_t *imp, pixman_op_t op,
 
 /* if the Src is opaque, call combine_src_u */
 static void
-combine_over_u (pixman_implementation_t *imp, pixman_op_t op,
-		comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_over_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                comp4_t *                dest,
+                const comp4_t *          src,
+                const comp4_t *          mask,
+                int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t ia = ALPHA_c(~s);
 
-        UNcx4_MUL_UNc_ADD_UNcx4(d, ia, s);
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t ia = ALPHA_c (~s);
+
+	UNcx4_MUL_UNc_ADD_UNcx4 (d, ia, s);
 	*(dest + i) = d;
     }
 }
 
 /* if the Dst is opaque, this is a noop */
 static void
-combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_over_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t ia = ALPHA_c(~*(dest + i));
-        UNcx4_MUL_UNc_ADD_UNcx4(s, ia, d);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t ia = ALPHA_c (~*(dest + i));
+	UNcx4_MUL_UNc_ADD_UNcx4 (s, ia, d);
 	*(dest + i) = s;
     }
 }
 
 /* if the Dst is opaque, call combine_src_u */
 static void
-combine_in_u (pixman_implementation_t *imp, pixman_op_t op,
-	      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_in_u (pixman_implementation_t *imp,
+              pixman_op_t              op,
+              comp4_t *                dest,
+              const comp4_t *          src,
+              const comp4_t *          mask,
+              int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t a = ALPHA_c(*(dest + i));
-        UNcx4_MUL_UNc(s, a);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t a = ALPHA_c (*(dest + i));
+	UNcx4_MUL_UNc (s, a);
 	*(dest + i) = s;
     }
 }
 
 /* if the Src is opaque, this is a noop */
 static void
-combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		     comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_in_reverse_u (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      comp4_t *                dest,
+                      const comp4_t *          src,
+                      const comp4_t *          mask,
+                      int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
+
+    for (i = 0; i < width; ++i)
+    {
 	comp4_t s = combine_mask (src, mask, i);
 	comp4_t d = *(dest + i);
-        comp4_t a = ALPHA_c(s);
-        UNcx4_MUL_UNc(d, a);
+	comp4_t a = ALPHA_c (s);
+	UNcx4_MUL_UNc (d, a);
 	*(dest + i) = d;
     }
 }
 
 /* if the Dst is opaque, call combine_clear */
 static void
-combine_out_u (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_out_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               comp4_t *                dest,
+               const comp4_t *          src,
+               const comp4_t *          mask,
+               int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t a = ALPHA_c(~*(dest + i));
-        UNcx4_MUL_UNc(s, a);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t a = ALPHA_c (~*(dest + i));
+	UNcx4_MUL_UNc (s, a);
 	*(dest + i) = s;
     }
 }
 
 /* if the Src is opaque, call combine_clear */
 static void
-combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_out_reverse_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       comp4_t *                dest,
+                       const comp4_t *          src,
+                       const comp4_t *          mask,
+                       int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
+
+    for (i = 0; i < width; ++i)
+    {
 	comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t a = ALPHA_c(~s);
-        UNcx4_MUL_UNc(d, a);
+	comp4_t d = *(dest + i);
+	comp4_t a = ALPHA_c (~s);
+	UNcx4_MUL_UNc (d, a);
 	*(dest + i) = d;
     }
 }
@@ -241,17 +285,23 @@ combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
 /* if the Dst is opaque, call combine_over_u */
 /* if both the Src and Dst are opaque, call combine_src_u */
 static void
-combine_atop_u (pixman_implementation_t *imp, pixman_op_t op,
-		comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_atop_u (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                comp4_t *                dest,
+                const comp4_t *          src,
+                const comp4_t *          mask,
+                int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t dest_a = ALPHA_c(d);
-        comp4_t src_ia = ALPHA_c(~s);
 
-        UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(s, dest_a, d, src_ia);
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t dest_a = ALPHA_c (d);
+	comp4_t src_ia = ALPHA_c (~s);
+
+	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_a, d, src_ia);
 	*(dest + i) = s;
     }
 }
@@ -260,17 +310,23 @@ combine_atop_u (pixman_implementation_t *imp, pixman_op_t op,
 /* if the Dst is opaque, call combine_in_reverse_u */
 /* if both the Src and Dst are opaque, call combine_dst_u */
 static void
-combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_atop_reverse_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t src_a = ALPHA_c(s);
-        comp4_t dest_ia = ALPHA_c(~d);
 
-        UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(s, dest_ia, d, src_a);
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t src_a = ALPHA_c (s);
+	comp4_t dest_ia = ALPHA_c (~d);
+
+	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_a);
 	*(dest + i) = s;
     }
 }
@@ -279,30 +335,42 @@ combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
 /* if the Dst is opaque, call combine_over_reverse_u */
 /* if both the Src and Dst are opaque, call combine_clear */
 static void
-combine_xor_u (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_xor_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               comp4_t *                dest,
+               const comp4_t *          src,
+               const comp4_t *          mask,
+               int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t src_ia = ALPHA_c(~s);
-        comp4_t dest_ia = ALPHA_c(~d);
 
-        UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(s, dest_ia, d, src_ia);
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t src_ia = ALPHA_c (~s);
+	comp4_t dest_ia = ALPHA_c (~d);
+
+	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (s, dest_ia, d, src_ia);
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_add_u (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_add_u (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               comp4_t *                dest,
+               const comp4_t *          src,
+               const comp4_t *          mask,
+               int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        UNcx4_ADD_UNcx4(d, s);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	UNcx4_ADD_UNcx4 (d, s);
 	*(dest + i) = d;
     }
 }
@@ -311,28 +379,35 @@ combine_add_u (pixman_implementation_t *imp, pixman_op_t op,
 /* if the Dst is opaque, call combine_add_u */
 /* if both the Src and Dst are opaque, call combine_add_u */
 static void
-combine_saturate_u (pixman_implementation_t *imp, pixman_op_t op,
-		    comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_saturate_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    comp4_t *                dest,
+                    const comp4_t *          src,
+                    const comp4_t *          mask,
+                    int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp2_t sa, da;
-
-        sa = s >> A_SHIFT;
-        da = ~d >> A_SHIFT;
-        if (sa > da)
-        {
-            sa = DIV_UNc(da, sa);
-            UNcx4_MUL_UNc(s, sa);
-        };
-        UNcx4_ADD_UNcx4(d, s);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp2_t sa, da;
+
+	sa = s >> A_SHIFT;
+	da = ~d >> A_SHIFT;
+	if (sa > da)
+	{
+	    sa = DIV_UNc (da, sa);
+	    UNcx4_MUL_UNc (s, sa);
+	}
+	;
+	UNcx4_ADD_UNcx4 (d, s);
 	*(dest + i) = d;
     }
 }
 
-/* 
+/*
  * PDF blend modes:
  * The following blend modes have been taken from the PDF ISO 32000
  * specification, which at this point in time is available from
@@ -343,50 +418,63 @@ combine_saturate_u (pixman_implementation_t *imp, pixman_op_t op,
  * with B() being the blend function.
  * Note that OVER is a special case of this operation, using B(Cb, Cs) = Cs
  *
- * These blend modes should match the SVG filter draft specification, as 
+ * These blend modes should match the SVG filter draft specification, as
  * it has been designed to mirror ISO 32000. Note that at the current point
  * no released draft exists that shows this, as the formulas have not been
  * updated yet after the release of ISO 32000.
  *
- * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and 
- * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an 
+ * The default implementation here uses the PDF_SEPARABLE_BLEND_MODE and
+ * PDF_NON_SEPARABLE_BLEND_MODE macros, which take the blend function as an
  * argument. Note that this implementation operates on premultiplied colors,
  * while the PDF specification does not. Therefore the code uses the formula
  * ar.Cra = (1 – as) . Dca + (1 – ad) . Sca + B(Dca, ad, Sca, as)
  */
 
-/* 
+/*
  * Multiply
  * B(Dca, ad, Sca, as) = Dca.Sca
  */
 
 static void
-combine_multiply_u (pixman_implementation_t *imp, pixman_op_t op,
-		    comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_multiply_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    comp4_t *                dest,
+                    const comp4_t *          src,
+                    const comp4_t *          mask,
+                    int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
 	comp4_t ss = s;
-        comp4_t src_ia = ALPHA_c (~s);
+	comp4_t src_ia = ALPHA_c (~s);
 	comp4_t dest_ia = ALPHA_c (~d);
 
 	UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (ss, dest_ia, d, src_ia);
 	UNcx4_MUL_UNcx4 (d, s);
-	UNcx4_ADD_UNcx4 (d, ss); 	
+	UNcx4_ADD_UNcx4 (d, ss);
+
 	*(dest + i) = d;
     }
 }
 
 static void
-combine_multiply_ca (pixman_implementation_t *imp, pixman_op_t op,
-                    comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_multiply_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     comp4_t *                dest,
+                     const comp4_t *          src,
+                     const comp4_t *          mask,
+                     int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t m = *(mask + i);
-        comp4_t s = *(src + i);
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t m = *(mask + i);
+	comp4_t s = *(src + i);
 	comp4_t d = *(dest + i);
 	comp4_t r = d;
 	comp4_t dest_ia = ALPHA_c (~d);
@@ -401,82 +489,88 @@ combine_multiply_ca (pixman_implementation_t *imp, pixman_op_t op,
     }
 }
 
-#define PDF_SEPARABLE_BLEND_MODE(name)		    \
-static void					    \
-combine_ ## name ## _u (pixman_implementation_t *imp, pixman_op_t op, \
-                        comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) \
-{						    \
-    int i;					    \
-    for (i = 0; i < width; ++i) {		    \
-        comp4_t s = combine_mask (src, mask, i);     \
-        comp4_t d = *(dest + i);		    \
-        comp1_t sa = ALPHA_c(s);			    \
-        comp1_t isa = ~sa;			    \
-        comp1_t da = ALPHA_c(d);	  		    \
-        comp1_t ida = ~da;			    \
-	comp4_t	result;				    \
-						    \
-	result = d;				    \
-        UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(result, isa, s, ida);	    \
-						    \
-	*(dest + i) = result +			    \
-	    (DIV_ONE_UNc (sa * da) << A_SHIFT) +	    \
-	    (blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \
-	    (blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \
-	    (blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa)); \
-    }						    \
-}						    \
-						    \
-static void				    \
-combine_ ## name ## _ca (pixman_implementation_t *imp, pixman_op_t op, \
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) \
-{						    \
-    int i;					    \
-    for (i = 0; i < width; ++i) {		    \
-	comp4_t m = *(mask + i);		    \
-	comp4_t s = *(src + i);                     \
-	comp4_t d = *(dest + i);		    \
-	comp1_t da = ALPHA_c(d);  		    \
-	comp1_t ida = ~da;			    \
-	comp4_t result;				    \
-						    \
-	combine_mask_value_ca (&s, &m);		    \
-						    \
-	result = d;				    \
-	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida);	    \
-						    \
-	result +=				    \
-	    (DIV_ONE_UNc (ALPHA_c (m) * da) << A_SHIFT) +				\
-	    (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) +	\
-	    (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) +	\
-	    (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m)));			\
-						    \
-	*(dest + i) = result;			    \
-    }						    \
-}
+#define PDF_SEPARABLE_BLEND_MODE(name)					\
+    static void								\
+    combine_ ## name ## _u (pixman_implementation_t *imp,		\
+			    pixman_op_t              op,		\
+                            comp4_t *                dest,		\
+			    const comp4_t *          src,		\
+			    const comp4_t *          mask,		\
+			    int                      width)		\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i) {					\
+	    comp4_t s = combine_mask (src, mask, i);			\
+	    comp4_t d = *(dest + i);					\
+	    comp1_t sa = ALPHA_c (s);					\
+	    comp1_t isa = ~sa;						\
+	    comp1_t da = ALPHA_c (d);					\
+	    comp1_t ida = ~da;						\
+	    comp4_t result;						\
+									\
+	    result = d;							\
+	    UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida);	\
+	    								\
+	    *(dest + i) = result +					\
+		(DIV_ONE_UNc (sa * da) << A_SHIFT) +			\
+		(blend_ ## name (RED_c (d), da, RED_c (s), sa) << R_SHIFT) + \
+		(blend_ ## name (GREEN_c (d), da, GREEN_c (s), sa) << G_SHIFT) + \
+		(blend_ ## name (BLUE_c (d), da, BLUE_c (s), sa));	\
+	}								\
+    }									\
+    									\
+    static void								\
+    combine_ ## name ## _ca (pixman_implementation_t *imp,		\
+			     pixman_op_t              op,		\
+                             comp4_t *                dest,		\
+			     const comp4_t *          src,		\
+			     const comp4_t *          mask,		\
+			     int                     width)		\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i) {					\
+	    comp4_t m = *(mask + i);					\
+	    comp4_t s = *(src + i);					\
+	    comp4_t d = *(dest + i);					\
+	    comp1_t da = ALPHA_c (d);					\
+	    comp1_t ida = ~da;						\
+	    comp4_t result;						\
+            								\
+	    combine_mask_value_ca (&s, &m);				\
+            								\
+	    result = d;							\
+	    UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (result, ~m, s, ida);     \
+            								\
+	    result +=							\
+	        (DIV_ONE_UNc (ALPHA_c (m) * da) << A_SHIFT) +		\
+	        (blend_ ## name (RED_c (d), da, RED_c (s), RED_c (m)) << R_SHIFT) + \
+	        (blend_ ## name (GREEN_c (d), da, GREEN_c (s), GREEN_c (m)) << G_SHIFT) + \
+	        (blend_ ## name (BLUE_c (d), da, BLUE_c (s), BLUE_c (m))); \
+	    								\
+	    *(dest + i) = result;					\
+	}								\
+    }
 
 /*
  * Screen
  * B(Dca, ad, Sca, as) = Dca.sa + Sca.da - Dca.Sca
  */
-
 static inline comp4_t
 blend_screen (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
-  return DIV_ONE_UNc (sca * da + dca * sa - sca * dca);
+    return DIV_ONE_UNc (sca * da + dca * sa - sca * dca);
 }
 
 PDF_SEPARABLE_BLEND_MODE (screen)
 
 /*
  * Overlay
- * B(Dca, ab, Sca, as) = 
+ * B(Dca, ab, Sca, as) =
  *   if 2.Dca < Da
  *     2.Sca.Dca
  *   otherwise
  *     Sa.Da - 2.(Da - Dca).(Sa - Sca)
  */
-
 static inline comp4_t
 blend_overlay (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
@@ -495,12 +589,11 @@ PDF_SEPARABLE_BLEND_MODE (overlay)
  * Darken
  * B(Dca, ab, Sca, as) = min (Sca.Da, Dca.Sa)
  */
-
 static inline comp4_t
 blend_darken (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
     comp4_t s, d;
-    
+
     s = sca * da;
     d = dca * sa;
     return DIV_ONE_UNc (s > d ? d : s);
@@ -512,12 +605,11 @@ PDF_SEPARABLE_BLEND_MODE (darken)
  * Lighten
  * B(Dca, ab, Sca, as) = max (Sca.Da, Dca.Sa)
  */
-
 static inline comp4_t
 blend_lighten (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
     comp4_t s, d;
-    
+
     s = sca * da;
     d = dca * sa;
     return DIV_ONE_UNc (s > d ? s : d);
@@ -527,19 +619,21 @@ PDF_SEPARABLE_BLEND_MODE (lighten)
 
 /*
  * Color dodge
- * B(Dca, ab, Sca, as) = 
+ * B(Dca, ab, Sca, as) =
  *   if Sca == Sa
  *     (Dca != 0).Sa.Da
  *   otherwise
  *     Da.Sa. min (Dca / Da / (1 - Sca/Sa))
- */ 
-
+ */
 static inline comp4_t
 blend_color_dodge (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
-    if (sca >= sa) {
+    if (sca >= sa)
+    {
 	return dca == 0 ? 0 : DIV_ONE_UNc (sa * da);
-    } else {
+    }
+    else
+    {
 	comp4_t rca = dca * sa * sa / (sa - sca);
 	return DIV_ONE_UNc (rca > sa * da ? sa * da : rca);
     }
@@ -549,19 +643,21 @@ PDF_SEPARABLE_BLEND_MODE (color_dodge)
 
 /*
  * Color burn
- * B(Dca, ab, Sca, as) = 
+ * B(Dca, ab, Sca, as) =
  *   if Sca. == 0
  *     (Da == Dca).Sa.Da
  *   otherwise
  *     Sa.Da.(1 - min (1, (1 - Dca/Da).Sa / Sca))
  */
-
 static inline comp4_t
 blend_color_burn (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
-    if (sca == 0) {
+    if (sca == 0)
+    {
 	return dca < da ? 0 : DIV_ONE_UNc (sa * da);
-    } else {
+    }
+    else
+    {
 	comp4_t sada = sa * da;
 	comp4_t rca = (da - dca) * sa * sa / sca;
 	return DIV_ONE_UNc (rca > sada ? 0 : sada - rca);
@@ -572,7 +668,7 @@ PDF_SEPARABLE_BLEND_MODE (color_burn)
 
 /*
  * Hard light
- * B(Dca, ab, Sca, as) = 
+ * B(Dca, ab, Sca, as) =
  *   if 2.Sca < Sa
  *     2.Sca.Dca
  *   otherwise
@@ -591,7 +687,7 @@ PDF_SEPARABLE_BLEND_MODE (hard_light)
 
 /*
  * Soft light
- * B(Dca, ab, Sca, as) = 
+ * B(Dca, ab, Sca, as) =
  *   if (2.Sca <= Sa)
  *     Dca.(Sa - (1 - Dca/Da).(2.Sca - Sa))
  *   otherwise if Dca.4 <= Da
@@ -599,9 +695,11 @@ PDF_SEPARABLE_BLEND_MODE (hard_light)
  *   otherwise
  *     (Dca.Sa + (SQRT (Dca/Da).Da - Dca).(2.Sca - Sa))
  */
-
 static inline comp4_t
-blend_soft_light (comp4_t dca_org, comp4_t da_org, comp4_t sca_org, comp4_t sa_org)
+blend_soft_light (comp4_t dca_org,
+		  comp4_t da_org,
+		  comp4_t sca_org,
+		  comp4_t sa_org)
 {
     double dca = dca_org * (1.0 / MASK);
     double da = da_org * (1.0 / MASK);
@@ -609,16 +707,24 @@ blend_soft_light (comp4_t dca_org, comp4_t da_org, comp4_t sca_org, comp4_t sa_o
     double sa = sa_org * (1.0 / MASK);
     double rca;
 
-    if (2 * sca < sa) {
+    if (2 * sca < sa)
+    {
 	if (da == 0)
 	    rca = dca * sa;
 	else
 	    rca = dca * sa - dca * (da - dca) * (sa - 2 * sca) / da;
-    } else if (da == 0) {
+    }
+    else if (da == 0)
+    {
 	rca = 0;
-    } else if (4 * dca <= da) {
-	rca = dca * sa + (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
-    } else {
+    }
+    else if (4 * dca <= da)
+    {
+	rca = dca * sa +
+	    (2 * sca - sa) * dca * ((16 * dca / da - 12) * dca / da + 3);
+    }
+    else
+    {
 	rca = dca * sa + (sqrt (dca * da) - dca) * (2 * sca - sa);
     }
     return rca * MASK + 0.5;
@@ -630,7 +736,6 @@ PDF_SEPARABLE_BLEND_MODE (soft_light)
  * Difference
  * B(Dca, ab, Sca, as) = abs (Dca.Sa - Sca.Da)
  */
-
 static inline comp4_t
 blend_difference (comp4_t dca, comp4_t da, comp4_t sca, comp4_t sa)
 {
@@ -665,9 +770,9 @@ PDF_SEPARABLE_BLEND_MODE (exclusion)
 
 /*
  * PDF nonseperable blend modes are implemented using the following functions
- * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid 
+ * to operate in Hsl space, with Cmax, Cmid, Cmin referring to the max, mid
  * and min value of the red, green and blue components.
- * 
+ *
  * LUM (C) = 0.3 × Cred + 0.59 × Cgreen + 0.11 × Cblue
  *
  * clip_color (C):
@@ -735,11 +840,11 @@ PDF_SEPARABLE_BLEND_MODE (exclusion)
  * C_mid cancel out. Specifically, it holds for x = r:
  *
  *    r * set_sat (C, s) = set_sat (r_c, rs)
- *  
+ *
  */
 
-/* So, for the non-separable PDF blend modes, we have (using s, d for non-premultiplied
- * colors, and S, D for premultiplied:
+/* So, for the non-separable PDF blend modes, we have (using s, d for
+ * non-premultiplied colors, and S, D for premultiplied:
  *
  *   Color:
  *
@@ -759,138 +864,169 @@ PDF_SEPARABLE_BLEND_MODE (exclusion)
  *
  *     a_s * a_d * B(s, d)
  *   = a_s * a_d * set_lum (set_sat (D/a_d, SAT (S/a_s)), LUM (D/a_d), 1)
- *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)), a_s * LUM (D), a_s * a_d)
+ *   = set_lum (a_s * a_d * set_sat (D/a_d, SAT (S/a_s)),
+ *                                        a_s * LUM (D), a_s * a_d)
  *   = set_lum (set_sat (a_s * D, a_d * SAT (S), a_s * LUM (D), a_s * a_d))
  *
  *   Hue:
  *
  *     a_s * a_d * B(s, d)
  *   = a_s * a_d * set_lum (set_sat (S/a_s, SAT (D/a_d)), LUM (D/a_d), 1)
- *   = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)), a_s * LUM (D), a_s * a_d)
+ *   = a_s * a_d * set_lum (set_sat (a_d * S, a_s * SAT (D)),
+ *                                        a_s * LUM (D), a_s * a_d)
  *
  */
-    
+
 #define CH_MIN(c) (c[0] < c[1] ? (c[0] < c[2] ? c[0] : c[2]) : (c[1] < c[2] ? c[1] : c[2]))
 #define CH_MAX(c) (c[0] > c[1] ? (c[0] > c[2] ? c[0] : c[2]) : (c[1] > c[2] ? c[1] : c[2]))
 #define LUM(c) ((c[0] * 30 + c[1] * 59 + c[2] * 11) / 100)
 #define SAT(c) (CH_MAX (c) - CH_MIN (c))
 
-#define PDF_NON_SEPARABLE_BLEND_MODE(name)					\
-static void								\
-combine_ ## name ## _u (pixman_implementation_t *imp, pixman_op_t op,	\
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width) \
-{									\
-    int i;								\
-    for (i = 0; i < width; ++i) {					\
-        comp4_t s = combine_mask (src, mask, i);				\
-        comp4_t d = *(dest + i);					\
-        comp1_t sa = ALPHA_c(s);						\
-        comp1_t isa = ~sa;						\
-        comp1_t da = ALPHA_c(d);						\
-        comp1_t ida = ~da;						\
-	comp4_t	result;							\
-	comp4_t sc[3], dc[3], c[3];					\
-									\
-	result = d;							\
-        UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc(result, isa, s, ida);				\
-	dc[0] = RED_c (d);						\
-	sc[0] = RED_c (s);						\
-	dc[1] = GREEN_c (d);						\
-	sc[1] = GREEN_c (s);						\
-	dc[2] = BLUE_c (d);						\
-	sc[2] = BLUE_c (s);						\
-	blend_ ## name (c, dc, da, sc, sa);				\
-									\
-	*(dest + i) = result +						\
-	    (DIV_ONE_UNc (sa * da) << A_SHIFT) +				\
-	    (DIV_ONE_UNc (c[0]) << R_SHIFT) +				\
-	    (DIV_ONE_UNc (c[1]) << G_SHIFT) +				\
-	    (DIV_ONE_UNc (c[2]));						\
-    }									\
-}									
+#define PDF_NON_SEPARABLE_BLEND_MODE(name)				\
+    static void								\
+    combine_ ## name ## _u (pixman_implementation_t *imp,		\
+			    pixman_op_t op,				\
+                            comp4_t *dest,				\
+			    const comp4_t *src,				\
+			    const comp4_t *mask,			\
+			    int width)					\
+    {									\
+	int i;								\
+	for (i = 0; i < width; ++i)					\
+	{								\
+	    comp4_t s = combine_mask (src, mask, i);			\
+	    comp4_t d = *(dest + i);					\
+	    comp1_t sa = ALPHA_c (s);					\
+	    comp1_t isa = ~sa;						\
+	    comp1_t da = ALPHA_c (d);					\
+	    comp1_t ida = ~da;						\
+	    comp4_t result;						\
+	    comp4_t sc[3], dc[3], c[3];					\
+            								\
+	    result = d;							\
+	    UNcx4_MUL_UNc_ADD_UNcx4_MUL_UNc (result, isa, s, ida);	\
+	    dc[0] = RED_c (d);						\
+	    sc[0] = RED_c (s);						\
+	    dc[1] = GREEN_c (d);					\
+	    sc[1] = GREEN_c (s);					\
+	    dc[2] = BLUE_c (d);						\
+	    sc[2] = BLUE_c (s);						\
+	    blend_ ## name (c, dc, da, sc, sa);				\
+            								\
+	    *(dest + i) = result +					\
+		(DIV_ONE_UNc (sa * da) << A_SHIFT) +			\
+		(DIV_ONE_UNc (c[0]) << R_SHIFT) +			\
+		(DIV_ONE_UNc (c[1]) << G_SHIFT) +			\
+		(DIV_ONE_UNc (c[2]));					\
+	}								\
+    }
 
 static void
 set_lum (comp4_t dest[3], comp4_t src[3], comp4_t sa, comp4_t lum)
 {
-  double a, l, min, max;
-  double tmp[3];
-  
-  a = sa * (1.0 / MASK);
-  l = lum * (1.0 / MASK);
-  tmp[0] = src[0] * (1.0 / MASK);
-  tmp[1] = src[1] * (1.0 / MASK);
-  tmp[2] = src[2] * (1.0 / MASK);
-  l = l - LUM (tmp);
-  tmp[0] += l;
-  tmp[1] += l;
-  tmp[2] += l;
-
-  /* clip_color */
-  l = LUM (tmp);
-  min = CH_MIN (tmp);
-  max = CH_MAX (tmp);
-
-  if (min < 0) {
-    tmp[0] = l + (tmp[0] - l) * l / (l - min);
-    tmp[1] = l + (tmp[1] - l) * l / (l - min);
-    tmp[2] = l + (tmp[2] - l) * l / (l - min);
-  }
-  if (max > a) {
-    tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
-    tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
-    tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
-  }
-  dest[0] = tmp[0] * MASK + 0.5;
-  dest[1] = tmp[1] * MASK + 0.5;
-  dest[2] = tmp[2] * MASK + 0.5;
+    double a, l, min, max;
+    double tmp[3];
+
+    a = sa * (1.0 / MASK);
+
+    l = lum * (1.0 / MASK);
+    tmp[0] = src[0] * (1.0 / MASK);
+    tmp[1] = src[1] * (1.0 / MASK);
+    tmp[2] = src[2] * (1.0 / MASK);
+
+    l = l - LUM (tmp);
+    tmp[0] += l;
+    tmp[1] += l;
+    tmp[2] += l;
+
+    /* clip_color */
+    l = LUM (tmp);
+    min = CH_MIN (tmp);
+    max = CH_MAX (tmp);
+
+    if (min < 0)
+    {
+	tmp[0] = l + (tmp[0] - l) * l / (l - min);
+	tmp[1] = l + (tmp[1] - l) * l / (l - min);
+	tmp[2] = l + (tmp[2] - l) * l / (l - min);
+    }
+    if (max > a)
+    {
+	tmp[0] = l + (tmp[0] - l) * (a - l) / (max - l);
+	tmp[1] = l + (tmp[1] - l) * (a - l) / (max - l);
+	tmp[2] = l + (tmp[2] - l) * (a - l) / (max - l);
+    }
+
+    dest[0] = tmp[0] * MASK + 0.5;
+    dest[1] = tmp[1] * MASK + 0.5;
+    dest[2] = tmp[2] * MASK + 0.5;
 }
 
 static void
 set_sat (comp4_t dest[3], comp4_t src[3], comp4_t sat)
 {
-  int id[3];
-  comp4_t min, max;
-
-  if (src[0] > src[1]) {
-    if (src[0] > src[2]) {
-      id[0] = 0;
-      if (src[1] > src[2]) {
-	id[1] = 1;
-	id[2] = 2;
-      } else {
-	id[1] = 2;
-	id[2] = 1;
-      }
-    } else {
-      id[0] = 2;
-      id[1] = 0;
-      id[2] = 1;
+    int id[3];
+    comp4_t min, max;
+
+    if (src[0] > src[1])
+    {
+	if (src[0] > src[2])
+	{
+	    id[0] = 0;
+	    if (src[1] > src[2])
+	    {
+		id[1] = 1;
+		id[2] = 2;
+	    }
+	    else
+	    {
+		id[1] = 2;
+		id[2] = 1;
+	    }
+	}
+	else
+	{
+	    id[0] = 2;
+	    id[1] = 0;
+	    id[2] = 1;
+	}
     }
-  } else {
-    if (src[0] > src[2]) {
-      id[0] = 1;
-      id[1] = 0;
-      id[2] = 2;
-    } else {
-      id[2] = 0;
-      if (src[1] > src[2]) {
-	id[0] = 1;
-	id[1] = 2;
-      } else {
-	id[0] = 2;
-	id[1] = 1;
-      }
+    else
+    {
+	if (src[0] > src[2])
+	{
+	    id[0] = 1;
+	    id[1] = 0;
+	    id[2] = 2;
+	}
+	else
+	{
+	    id[2] = 0;
+	    if (src[1] > src[2])
+	    {
+		id[0] = 1;
+		id[1] = 2;
+	    }
+	    else
+	    {
+		id[0] = 2;
+		id[1] = 1;
+	    }
+	}
+    }
+
+    max = dest[id[0]];
+    min = dest[id[2]];
+    if (max > min)
+    {
+	dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
+	dest[id[0]] = sat;
+	dest[id[2]] = 0;
+    }
+    else
+    {
+	dest[0] = dest[1] = dest[2] = 0;
     }
-  }
-  max = dest[id[0]];
-  min = dest[id[2]];
-  if (max > min) {
-    dest[id[1]] = (dest[id[1]] - min) * sat / (max - min);
-    dest[id[0]] = sat;
-    dest[id[2]] = 0;
-  } else {
-    dest[0] = dest[1] = dest[2] = 0;
-  }
 }
 
 /*
@@ -898,7 +1034,11 @@ set_sat (comp4_t dest[3], comp4_t src[3], comp4_t sat)
  * B(Cb, Cs) = set_lum (set_sat (Cs, SAT (Cb)), LUM (Cb))
  */
 static inline void
-blend_hsl_hue (comp4_t c[3], comp4_t dc[3], comp4_t da, comp4_t sc[3], comp4_t sa)
+blend_hsl_hue (comp4_t c[3],
+               comp4_t dc[3],
+               comp4_t da,
+               comp4_t sc[3],
+               comp4_t sa)
 {
     c[0] = sc[0] * da;
     c[1] = sc[1] * da;
@@ -914,7 +1054,11 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_hue)
  * B(Cb, Cs) = set_lum (set_sat (Cb, SAT (Cs)), LUM (Cb))
  */
 static inline void
-blend_hsl_saturation (comp4_t c[3], comp4_t dc[3], comp4_t da, comp4_t sc[3], comp4_t sa)
+blend_hsl_saturation (comp4_t c[3],
+                      comp4_t dc[3],
+                      comp4_t da,
+                      comp4_t sc[3],
+                      comp4_t sa)
 {
     c[0] = dc[0] * sa;
     c[1] = dc[1] * sa;
@@ -930,7 +1074,11 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_saturation)
  * B(Cb, Cs) = set_lum (Cs, LUM (Cb))
  */
 static inline void
-blend_hsl_color (comp4_t c[3], comp4_t dc[3], comp4_t da, comp4_t sc[3], comp4_t sa)
+blend_hsl_color (comp4_t c[3],
+                 comp4_t dc[3],
+                 comp4_t da,
+                 comp4_t sc[3],
+                 comp4_t sa)
 {
     c[0] = sc[0] * da;
     c[1] = sc[1] * da;
@@ -945,7 +1093,11 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_color)
  * B(Cb, Cs) = set_lum (Cb, LUM (Cs))
  */
 static inline void
-blend_hsl_luminosity (comp4_t c[3], comp4_t dc[3], comp4_t da, comp4_t sc[3], comp4_t sa)
+blend_hsl_luminosity (comp4_t c[3],
+                      comp4_t dc[3],
+                      comp4_t da,
+                      comp4_t sc[3],
+                      comp4_t sa)
 {
     c[0] = dc[0] * sa;
     c[1] = dc[1] * sa;
@@ -964,42 +1116,41 @@ PDF_NON_SEPARABLE_BLEND_MODE (hsl_luminosity)
 /* Overlay
  *
  * All of the disjoint composing functions
-
- The four entries in the first column indicate what source contributions
- come from each of the four areas of the picture -- areas covered by neither
- A nor B, areas covered only by A, areas covered only by B and finally
- areas covered by both A and B.
-
- Disjoint			Conjoint
- Fa		Fb		Fa		Fb
- (0,0,0,0)	0		0		0		0
- (0,A,0,A)	1		0		1		0
- (0,0,B,B)	0		1		0		1
- (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
- (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
- (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
- (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
- (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
- (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
- (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
- (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
- (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
-
-*/
+ *
+ * The four entries in the first column indicate what source contributions
+ * come from each of the four areas of the picture -- areas covered by neither
+ * A nor B, areas covered only by A, areas covered only by B and finally
+ * areas covered by both A and B.
+ * 
+ * Disjoint			Conjoint
+ * Fa		Fb		Fa		Fb
+ * (0,0,0,0)	0		0		0		0
+ * (0,A,0,A)	1		0		1		0
+ * (0,0,B,B)	0		1		0		1
+ * (0,A,B,A)	1		min((1-a)/b,1)	1		max(1-a/b,0)
+ * (0,A,B,B)	min((1-b)/a,1)	1		max(1-b/a,0)	1
+ * (0,0,0,A)	max(1-(1-b)/a,0) 0		min(1,b/a)	0
+ * (0,0,0,B)	0		max(1-(1-a)/b,0) 0		min(a/b,1)
+ * (0,A,0,0)	min(1,(1-b)/a)	0		max(1-b/a,0)	0
+ * (0,0,B,0)	0		min(1,(1-a)/b)	0		max(1-a/b,0)
+ * (0,0,B,A)	max(1-(1-b)/a,0) min(1,(1-a)/b)	 min(1,b/a)	max(1-a/b,0)
+ * (0,A,0,B)	min(1,(1-b)/a)	max(1-(1-a)/b,0) max(1-b/a,0)	min(1,a/b)
+ * (0,A,B,0)	min(1,(1-b)/a)	min(1,(1-a)/b)	max(1-b/a,0)	max(1-a/b,0)
+ */
 
 #define COMBINE_A_OUT 1
 #define COMBINE_A_IN  2
 #define COMBINE_B_OUT 4
 #define COMBINE_B_IN  8
 
-#define COMBINE_CLEAR	0
-#define COMBINE_A	(COMBINE_A_OUT|COMBINE_A_IN)
-#define COMBINE_B	(COMBINE_B_OUT|COMBINE_B_IN)
-#define COMBINE_A_OVER	(COMBINE_A_OUT|COMBINE_B_OUT|COMBINE_A_IN)
-#define COMBINE_B_OVER	(COMBINE_A_OUT|COMBINE_B_OUT|COMBINE_B_IN)
-#define COMBINE_A_ATOP	(COMBINE_B_OUT|COMBINE_A_IN)
-#define COMBINE_B_ATOP	(COMBINE_A_OUT|COMBINE_B_IN)
-#define COMBINE_XOR	(COMBINE_A_OUT|COMBINE_B_OUT)
+#define COMBINE_CLEAR   0
+#define COMBINE_A       (COMBINE_A_OUT | COMBINE_A_IN)
+#define COMBINE_B       (COMBINE_B_OUT | COMBINE_B_IN)
+#define COMBINE_A_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_A_IN)
+#define COMBINE_B_OVER  (COMBINE_A_OUT | COMBINE_B_OUT | COMBINE_B_IN)
+#define COMBINE_A_ATOP  (COMBINE_B_OUT | COMBINE_A_IN)
+#define COMBINE_B_ATOP  (COMBINE_A_OUT | COMBINE_B_IN)
+#define COMBINE_XOR     (COMBINE_A_OUT | COMBINE_B_OUT)
 
 /* portion covered by a but not b */
 static comp1_t
@@ -1007,10 +1158,10 @@ combine_disjoint_out_part (comp1_t a, comp1_t b)
 {
     /* min (1, (1-b) / a) */
 
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return MASK;	    /* 1 */
-    return DIV_UNc(b,a);     /* (1-b) / a */
+    b = ~b;                 /* 1 - b */
+    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
+	return MASK;        /* 1 */
+    return DIV_UNc (b, a);     /* (1-b) / a */
 }
 
 /* portion covered by both a and b */
@@ -1021,10 +1172,10 @@ combine_disjoint_in_part (comp1_t a, comp1_t b)
     /*  = - min ((1-b)/a - 1, 0) */
     /*  = 1 - min (1, (1-b)/a) */
 
-    b = ~b;		    /* 1 - b */
-    if (b >= a)		    /* 1 - b >= a -> (1-b)/a >= 1 */
-	return 0;	    /* 1 - 1 */
-    return ~DIV_UNc(b,a);    /* 1 - (1-b) / a */
+    b = ~b;                 /* 1 - b */
+    if (b >= a)             /* 1 - b >= a -> (1-b)/a >= 1 */
+	return 0;           /* 1 - 1 */
+    return ~DIV_UNc(b, a);    /* 1 - (1-b) / a */
 }
 
 /* portion covered by a but not b */
@@ -1036,9 +1187,9 @@ combine_conjoint_out_part (comp1_t a, comp1_t b)
 
     /* min (1, (1-b) / a) */
 
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return 0x00;	    /* 0 */
-    return ~DIV_UNc(b,a);    /* 1 - b/a */
+    if (b >= a)             /* b >= a -> b/a >= 1 */
+	return 0x00;        /* 0 */
+    return ~DIV_UNc(b, a);    /* 1 - b/a */
 }
 
 /* portion covered by both a and b */
@@ -1047,277 +1198,387 @@ combine_conjoint_in_part (comp1_t a, comp1_t b)
 {
     /* min (1,b/a) */
 
-    if (b >= a)		    /* b >= a -> b/a >= 1 */
-	return MASK;	    /* 1 */
-    return DIV_UNc(b,a);     /* b/a */
+    if (b >= a)             /* b >= a -> b/a >= 1 */
+	return MASK;        /* 1 */
+    return DIV_UNc (b, a);     /* b/a */
 }
 
-#define GET_COMP(v,i)   ((comp2_t) (comp1_t) ((v) >> i))
+#define GET_COMP(v, i)   ((comp2_t) (comp1_t) ((v) >> i))
 
-#define ADD(x,y,i,t)   ((t) = GET_COMP(x,i) + GET_COMP(y,i),              \
-                        (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
+#define ADD(x, y, i, t)							\
+    ((t) = GET_COMP (x, i) + GET_COMP (y, i),				\
+     (comp4_t) ((comp1_t) ((t) | (0 - ((t) >> G_SHIFT)))) << (i))
 
-#define GENERIC(x,y,i,ax,ay,t,u,v) ((t) = (MUL_UNc(GET_COMP(y,i),ay,(u)) + \
-					 MUL_UNc(GET_COMP(x,i),ax,(v))), \
-				  	 (comp4_t) ((comp1_t) ((t) |		\
-					 (0 - ((t) >> G_SHIFT)))) << (i))
+#define GENERIC(x, y, i, ax, ay, t, u, v)				\
+    ((t) = (MUL_UNc (GET_COMP (y, i), ay, (u)) +			\
+            MUL_UNc (GET_COMP (x, i), ax, (v))),			\
+     (comp4_t) ((comp1_t) ((t) |					\
+                           (0 - ((t) >> G_SHIFT)))) << (i))
 
 static void
-combine_disjoint_general_u (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
+combine_disjoint_general_u (comp4_t *      dest,
+                            const comp4_t *src,
+                            const comp4_t *mask,
+                            int            width,
+                            comp1_t        combine)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t m,n,o,p;
-        comp2_t Fa, Fb, t, u, v;
-        comp1_t sa = s >> A_SHIFT;
-        comp1_t da = d >> A_SHIFT;
-
-        switch (combine & COMBINE_A) {
-        default:
-            Fa = 0;
-            break;
-        case COMBINE_A_OUT:
-            Fa = combine_disjoint_out_part (sa, da);
-            break;
-        case COMBINE_A_IN:
-            Fa = combine_disjoint_in_part (sa, da);
-            break;
-        case COMBINE_A:
-            Fa = MASK;
-            break;
-        }
-
-        switch (combine & COMBINE_B) {
-        default:
-            Fb = 0;
-            break;
-        case COMBINE_B_OUT:
-            Fb = combine_disjoint_out_part (da, sa);
-            break;
-        case COMBINE_B_IN:
-            Fb = combine_disjoint_in_part (da, sa);
-            break;
-        case COMBINE_B:
-            Fb = MASK;
-            break;
-        }
-        m = GENERIC (s,d,0,Fa,Fb,t, u, v);
-        n = GENERIC (s,d,G_SHIFT,Fa,Fb,t, u, v);
-        o = GENERIC (s,d,R_SHIFT,Fa,Fb,t, u, v);
-        p = GENERIC (s,d,A_SHIFT,Fa,Fb,t, u, v);
-        s = m|n|o|p;
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t m, n, o, p;
+	comp2_t Fa, Fb, t, u, v;
+	comp1_t sa = s >> A_SHIFT;
+	comp1_t da = d >> A_SHIFT;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    Fa = combine_disjoint_out_part (sa, da);
+	    break;
+
+	case COMBINE_A_IN:
+	    Fa = combine_disjoint_in_part (sa, da);
+	    break;
+
+	case COMBINE_A:
+	    Fa = MASK;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    Fb = combine_disjoint_out_part (da, sa);
+	    break;
+
+	case COMBINE_B_IN:
+	    Fb = combine_disjoint_in_part (da, sa);
+	    break;
+
+	case COMBINE_B:
+	    Fb = MASK;
+	    break;
+	}
+	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
+	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
+	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
+	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
+	s = m | n | o | p;
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_disjoint_over_u (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_over_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp2_t a = s >> A_SHIFT;
-
-        if (a != 0x00)
-        {
-            if (a != MASK)
-            {
-                comp4_t d = *(dest + i);
-                a = combine_disjoint_out_part (d >> A_SHIFT, a);
-                UNcx4_MUL_UNc_ADD_UNcx4(d, a, s);
-                s = d;
-            }
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp2_t a = s >> A_SHIFT;
+
+	if (a != 0x00)
+	{
+	    if (a != MASK)
+	    {
+		comp4_t d = *(dest + i);
+		a = combine_disjoint_out_part (d >> A_SHIFT, a);
+		UNcx4_MUL_UNc_ADD_UNcx4 (d, a, s);
+		s = d;
+	    }
+
 	    *(dest + i) = s;
-        }
+	}
     }
 }
 
 static void
-combine_disjoint_in_u (pixman_implementation_t *imp, pixman_op_t op,
-		      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_in_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       comp4_t *                dest,
+                       const comp4_t *          src,
+                       const comp4_t *          mask,
+                       int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
 }
 
 static void
-combine_disjoint_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			     comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_in_reverse_u (pixman_implementation_t *imp,
+                               pixman_op_t              op,
+                               comp4_t *                dest,
+                               const comp4_t *          src,
+                               const comp4_t *          mask,
+                               int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
 }
 
 static void
-combine_disjoint_out_u (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_out_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
 }
 
 static void
-combine_disjoint_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_out_reverse_u (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                comp4_t *                dest,
+                                const comp4_t *          src,
+                                const comp4_t *          mask,
+                                int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
 }
 
 static void
-combine_disjoint_atop_u (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_atop_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
 }
 
 static void
-combine_disjoint_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_atop_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 comp4_t *                dest,
+                                 const comp4_t *          src,
+                                 const comp4_t *          mask,
+                                 int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
 }
 
 static void
-combine_disjoint_xor_u (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_xor_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     combine_disjoint_general_u (dest, src, mask, width, COMBINE_XOR);
 }
 
 static void
-combine_conjoint_general_u (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
+combine_conjoint_general_u (comp4_t *      dest,
+                            const comp4_t *src,
+                            const comp4_t *mask,
+                            int            width,
+                            comp1_t        combine)
 {
     int i;
-    for (i = 0; i < width; ++i) {
-        comp4_t s = combine_mask (src, mask, i);
-        comp4_t d = *(dest + i);
-        comp4_t m,n,o,p;
-        comp2_t Fa, Fb, t, u, v;
-        comp1_t sa = s >> A_SHIFT;
-        comp1_t da = d >> A_SHIFT;
-
-        switch (combine & COMBINE_A) {
-        default:
-            Fa = 0;
-            break;
-        case COMBINE_A_OUT:
-            Fa = combine_conjoint_out_part (sa, da);
-            break;
-        case COMBINE_A_IN:
-            Fa = combine_conjoint_in_part (sa, da);
-            break;
-        case COMBINE_A:
-            Fa = MASK;
-            break;
-        }
-
-        switch (combine & COMBINE_B) {
-        default:
-            Fb = 0;
-            break;
-        case COMBINE_B_OUT:
-            Fb = combine_conjoint_out_part (da, sa);
-            break;
-        case COMBINE_B_IN:
-            Fb = combine_conjoint_in_part (da, sa);
-            break;
-        case COMBINE_B:
-            Fb = MASK;
-            break;
-        }
-        m = GENERIC (s,d,0,Fa,Fb,t, u, v);
-        n = GENERIC (s,d,G_SHIFT,Fa,Fb,t, u, v);
-        o = GENERIC (s,d,R_SHIFT,Fa,Fb,t, u, v);
-        p = GENERIC (s,d,A_SHIFT,Fa,Fb,t, u, v);
-        s = m|n|o|p;
+
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = combine_mask (src, mask, i);
+	comp4_t d = *(dest + i);
+	comp4_t m, n, o, p;
+	comp2_t Fa, Fb, t, u, v;
+	comp1_t sa = s >> A_SHIFT;
+	comp1_t da = d >> A_SHIFT;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    Fa = combine_conjoint_out_part (sa, da);
+	    break;
+
+	case COMBINE_A_IN:
+	    Fa = combine_conjoint_in_part (sa, da);
+	    break;
+
+	case COMBINE_A:
+	    Fa = MASK;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    Fb = combine_conjoint_out_part (da, sa);
+	    break;
+
+	case COMBINE_B_IN:
+	    Fb = combine_conjoint_in_part (da, sa);
+	    break;
+
+	case COMBINE_B:
+	    Fb = MASK;
+	    break;
+	}
+
+	m = GENERIC (s, d, 0, Fa, Fb, t, u, v);
+	n = GENERIC (s, d, G_SHIFT, Fa, Fb, t, u, v);
+	o = GENERIC (s, d, R_SHIFT, Fa, Fb, t, u, v);
+	p = GENERIC (s, d, A_SHIFT, Fa, Fb, t, u, v);
+
+	s = m | n | o | p;
+
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_conjoint_over_u (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_over_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OVER);
 }
 
-
 static void
-combine_conjoint_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_over_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 comp4_t *                dest,
+                                 const comp4_t *          src,
+                                 const comp4_t *          mask,
+                                 int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OVER);
 }
 
-
 static void
-combine_conjoint_in_u (pixman_implementation_t *imp, pixman_op_t op,
-		      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_in_u (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       comp4_t *                dest,
+                       const comp4_t *          src,
+                       const comp4_t *          mask,
+                       int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_IN);
 }
 
-
 static void
-combine_conjoint_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			     comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_in_reverse_u (pixman_implementation_t *imp,
+                               pixman_op_t              op,
+                               comp4_t *                dest,
+                               const comp4_t *          src,
+                               const comp4_t *          mask,
+                               int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_IN);
 }
 
 static void
-combine_conjoint_out_u (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_out_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_OUT);
 }
 
 static void
-combine_conjoint_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_out_reverse_u (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                comp4_t *                dest,
+                                const comp4_t *          src,
+                                const comp4_t *          mask,
+                                int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_OUT);
 }
 
 static void
-combine_conjoint_atop_u (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_atop_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_A_ATOP);
 }
 
 static void
-combine_conjoint_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_atop_reverse_u (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 comp4_t *                dest,
+                                 const comp4_t *          src,
+                                 const comp4_t *          mask,
+                                 int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_B_ATOP);
 }
 
 static void
-combine_conjoint_xor_u (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_xor_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     combine_conjoint_general_u (dest, src, mask, width, COMBINE_XOR);
 }
 
-/********************************************************************************/
-/*************************** Per Channel functions ******************************/
-/********************************************************************************/
+/************************************************************************/
+/*********************** Per Channel functions **************************/
+/************************************************************************/
 
 static void
-combine_clear_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_clear_ca (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  comp4_t *                dest,
+                  const comp4_t *          src,
+                  const comp4_t *          mask,
+                  int                      width)
 {
-    memset(dest, 0, width*sizeof(comp4_t));
+    memset (dest, 0, width * sizeof(comp4_t));
 }
 
 static void
-combine_src_ca (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_src_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                comp4_t *                dest,
+                const comp4_t *          src,
+                const comp4_t *          mask,
+                int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
+    for (i = 0; i < width; ++i)
+    {
 	comp4_t s = *(src + i);
 	comp4_t m = *(mask + i);
 
@@ -1328,12 +1589,17 @@ combine_src_ca (pixman_implementation_t *imp, pixman_op_t op,
 }
 
 static void
-combine_over_ca (pixman_implementation_t *imp, pixman_op_t op,
-		comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_over_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 comp4_t *                dest,
+                 const comp4_t *          src,
+                 const comp4_t *          mask,
+                 int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
+    for (i = 0; i < width; ++i)
+    {
 	comp4_t s = *(src + i);
 	comp4_t m = *(mask + i);
 	comp4_t a;
@@ -1341,550 +1607,706 @@ combine_over_ca (pixman_implementation_t *imp, pixman_op_t op,
 	combine_mask_ca (&s, &m);
 
 	a = ~m;
-        if (a != ~0)
-        {
-            if (a)
-            {
-                comp4_t d = *(dest + i);
-                UNcx4_MUL_UNcx4_ADD_UNcx4(d, a, s);
-                s = d;
-            }
+	if (a != ~0)
+	{
+	    if (a)
+	    {
+		comp4_t d = *(dest + i);
+		UNcx4_MUL_UNcx4_ADD_UNcx4 (d, a, s);
+		s = d;
+	    }
+
 	    *(dest + i) = s;
-        }
+	}
     }
 }
 
 static void
-combine_over_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_over_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t d = *(dest + i);
-        comp4_t a = ~d >> A_SHIFT;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t d = *(dest + i);
+	comp4_t a = ~d >> A_SHIFT;
 
-        if (a)
-        {
-            comp4_t s = *(src + i);
+	if (a)
+	{
+	    comp4_t s = *(src + i);
 	    comp4_t m = *(mask + i);
 
 	    combine_mask_value_ca (&s, &m);
 
-            if (a != MASK)
-            {
-                UNcx4_MUL_UNc_ADD_UNcx4(s, a, d);
-            }
+	    if (a != MASK)
+		UNcx4_MUL_UNc_ADD_UNcx4 (s, a, d);
+
 	    *(dest + i) = s;
-        }
+	}
     }
 }
 
 static void
-combine_in_ca (pixman_implementation_t *imp, pixman_op_t op,
-	      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_in_ca (pixman_implementation_t *imp,
+               pixman_op_t              op,
+               comp4_t *                dest,
+               const comp4_t *          src,
+               const comp4_t *          mask,
+               int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t d = *(dest + i);
-        comp2_t a = d >> A_SHIFT;
-        comp4_t s = 0;
-        if (a)
-        {
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t d = *(dest + i);
+	comp2_t a = d >> A_SHIFT;
+	comp4_t s = 0;
+
+	if (a)
+	{
 	    comp4_t m = *(mask + i);
 
 	    s = *(src + i);
 	    combine_mask_value_ca (&s, &m);
-            if (a != MASK)
-            {
-                UNcx4_MUL_UNc(s, a);
-            }
-        }
+
+	    if (a != MASK)
+		UNcx4_MUL_UNc (s, a);
+	}
+
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		     comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_in_reverse_ca (pixman_implementation_t *imp,
+                       pixman_op_t              op,
+                       comp4_t *                dest,
+                       const comp4_t *          src,
+                       const comp4_t *          mask,
+                       int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
-        comp4_t m = *(mask + i);
-        comp4_t a;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t a;
 
 	combine_mask_alpha_ca (&s, &m);
 
 	a = m;
-        if (a != ~0)
-        {
-            comp4_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                UNcx4_MUL_UNcx4(d, a);
-            }
+	if (a != ~0)
+	{
+	    comp4_t d = 0;
+
+	    if (a)
+	    {
+		d = *(dest + i);
+		UNcx4_MUL_UNcx4 (d, a);
+	    }
+
 	    *(dest + i) = d;
-        }
+	}
     }
 }
 
 static void
-combine_out_ca (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_out_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                comp4_t *                dest,
+                const comp4_t *          src,
+                const comp4_t *          mask,
+                int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t d = *(dest + i);
-        comp2_t a = ~d >> A_SHIFT;
-        comp4_t s = 0;
-        if (a)
-        {
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t d = *(dest + i);
+	comp2_t a = ~d >> A_SHIFT;
+	comp4_t s = 0;
+
+	if (a)
+	{
 	    comp4_t m = *(mask + i);
 
 	    s = *(src + i);
 	    combine_mask_value_ca (&s, &m);
 
-            if (a != MASK)
-            {
-                UNcx4_MUL_UNc(s, a);
-            }
-        }
+	    if (a != MASK)
+		UNcx4_MUL_UNc (s, a);
+	}
+
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_out_reverse_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
+    for (i = 0; i < width; ++i)
+    {
 	comp4_t s = *(src + i);
 	comp4_t m = *(mask + i);
 	comp4_t a;
 
 	combine_mask_alpha_ca (&s, &m);
 
-        a = ~m;
-        if (a != ~0)
-        {
-            comp4_t d = 0;
-            if (a)
-            {
-                d = *(dest + i);
-                UNcx4_MUL_UNcx4(d, a);
-            }
+	a = ~m;
+	if (a != ~0)
+	{
+	    comp4_t d = 0;
+
+	    if (a)
+	    {
+		d = *(dest + i);
+		UNcx4_MUL_UNcx4 (d, a);
+	    }
+
 	    *(dest + i) = d;
-        }
+	}
     }
 }
 
 static void
-combine_atop_ca (pixman_implementation_t *imp, pixman_op_t op,
-		comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_atop_ca (pixman_implementation_t *imp,
+                 pixman_op_t              op,
+                 comp4_t *                dest,
+                 const comp4_t *          src,
+                 const comp4_t *          mask,
+                 int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t d = *(dest + i);
-        comp4_t s = *(src + i);
-        comp4_t m = *(mask + i);
-        comp4_t ad;
-        comp2_t as = d >> A_SHIFT;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t d = *(dest + i);
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t ad;
+	comp2_t as = d >> A_SHIFT;
 
 	combine_mask_ca (&s, &m);
 
-        ad = ~m;
+	ad = ~m;
+
+	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
 
-        UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(d, ad, s, as);
 	*(dest + i) = d;
     }
 }
 
 static void
-combine_atop_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_atop_reverse_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-
-        comp4_t d = *(dest + i);
-        comp4_t s = *(src + i);
-        comp4_t m = *(mask + i);
-        comp4_t ad;
-        comp2_t as = ~d >> A_SHIFT;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t d = *(dest + i);
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t ad;
+	comp2_t as = ~d >> A_SHIFT;
 
 	combine_mask_ca (&s, &m);
 
 	ad = m;
 
-        UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(d, ad, s, as);
+	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
+
 	*(dest + i) = d;
     }
 }
 
 static void
-combine_xor_ca (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_xor_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                comp4_t *                dest,
+                const comp4_t *          src,
+                const comp4_t *          mask,
+                int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t d = *(dest + i);
-        comp4_t s = *(src + i);
-        comp4_t m = *(mask + i);
-        comp4_t ad;
-        comp2_t as = ~d >> A_SHIFT;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t d = *(dest + i);
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t ad;
+	comp2_t as = ~d >> A_SHIFT;
 
 	combine_mask_ca (&s, &m);
 
 	ad = ~m;
 
-        UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc(d, ad, s, as);
+	UNcx4_MUL_UNcx4_ADD_UNcx4_MUL_UNc (d, ad, s, as);
+
 	*(dest + i) = d;
     }
 }
 
 static void
-combine_add_ca (pixman_implementation_t *imp, pixman_op_t op,
-	       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_add_ca (pixman_implementation_t *imp,
+                pixman_op_t              op,
+                comp4_t *                dest,
+                const comp4_t *          src,
+                const comp4_t *          mask,
+                int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t s = *(src + i);
-        comp4_t m = *(mask + i);
-        comp4_t d = *(dest + i);
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s = *(src + i);
+	comp4_t m = *(mask + i);
+	comp4_t d = *(dest + i);
 
 	combine_mask_value_ca (&s, &m);
 
-        UNcx4_ADD_UNcx4(d, s);
+	UNcx4_ADD_UNcx4 (d, s);
+
 	*(dest + i) = d;
     }
 }
 
 static void
-combine_saturate_ca (pixman_implementation_t *imp, pixman_op_t op,
-		    comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_saturate_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     comp4_t *                dest,
+                     const comp4_t *          src,
+                     const comp4_t *          mask,
+                     int                      width)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t s, d;
-        comp2_t sa, sr, sg, sb, da;
-        comp2_t t, u, v;
-        comp4_t m,n,o,p;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s, d;
+	comp2_t sa, sr, sg, sb, da;
+	comp2_t t, u, v;
+	comp4_t m, n, o, p;
 
-        d = *(dest + i);
-        s = *(src + i);
+	d = *(dest + i);
+	s = *(src + i);
 	m = *(mask + i);
 
 	combine_mask_ca (&s, &m);
 
-        sa = (m >> A_SHIFT);
-        sr = (m >> R_SHIFT) & MASK;
-        sg = (m >> G_SHIFT) & MASK;
-        sb =  m             & MASK;
-        da = ~d >> A_SHIFT;
-
-        if (sb <= da)
-            m = ADD(s,d,0,t);
-        else
-            m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
-
-        if (sg <= da)
-            n = ADD(s,d,G_SHIFT,t);
-        else
-            n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
-
-        if (sr <= da)
-            o = ADD(s,d,R_SHIFT,t);
-        else
-            o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
-
-        if (sa <= da)
-            p = ADD(s,d,A_SHIFT,t);
-        else
-            p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
-
-	*(dest + i) = m|n|o|p;
+	sa = (m >> A_SHIFT);
+	sr = (m >> R_SHIFT) & MASK;
+	sg = (m >> G_SHIFT) & MASK;
+	sb =  m             & MASK;
+	da = ~d >> A_SHIFT;
+
+	if (sb <= da)
+	    m = ADD (s, d, 0, t);
+	else
+	    m = GENERIC (s, d, 0, (da << G_SHIFT) / sb, MASK, t, u, v);
+
+	if (sg <= da)
+	    n = ADD (s, d, G_SHIFT, t);
+	else
+	    n = GENERIC (s, d, G_SHIFT, (da << G_SHIFT) / sg, MASK, t, u, v);
+
+	if (sr <= da)
+	    o = ADD (s, d, R_SHIFT, t);
+	else
+	    o = GENERIC (s, d, R_SHIFT, (da << G_SHIFT) / sr, MASK, t, u, v);
+
+	if (sa <= da)
+	    p = ADD (s, d, A_SHIFT, t);
+	else
+	    p = GENERIC (s, d, A_SHIFT, (da << G_SHIFT) / sa, MASK, t, u, v);
+
+	*(dest + i) = m | n | o | p;
     }
 }
 
 static void
-combine_disjoint_general_ca (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
+combine_disjoint_general_ca (comp4_t *      dest,
+                             const comp4_t *src,
+                             const comp4_t *mask,
+                             int            width,
+                             comp1_t        combine)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t s, d;
-        comp4_t m,n,o,p;
-        comp4_t Fa, Fb;
-        comp2_t t, u, v;
-        comp4_t sa;
-        comp1_t da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> A_SHIFT;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s, d;
+	comp4_t m, n, o, p;
+	comp4_t Fa, Fb;
+	comp2_t t, u, v;
+	comp4_t sa;
+	comp1_t da;
+
+	s = *(src + i);
+	m = *(mask + i);
+	d = *(dest + i);
+	da = d >> A_SHIFT;
 
 	combine_mask_ca (&s, &m);
 
 	sa = m;
 
-        switch (combine & COMBINE_A) {
-        default:
-            Fa = 0;
-            break;
-        case COMBINE_A_OUT:
-            m = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> 0), da);
-            n = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-            p = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-            Fa = m|n|o|p;
-            break;
-        case COMBINE_A_IN:
-            m = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> 0), da);
-            n = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-            p = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-            Fa = m|n|o|p;
-            break;
-        case COMBINE_A:
-            Fa = ~0;
-            break;
-        }
-
-        switch (combine & COMBINE_B) {
-        default:
-            Fb = 0;
-            break;
-        case COMBINE_B_OUT:
-            m = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> 0));
-            n = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-            p = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-            Fb = m|n|o|p;
-            break;
-        case COMBINE_B_IN:
-            m = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> 0));
-            n = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-            p = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-            Fb = m|n|o|p;
-            break;
-        case COMBINE_B:
-            Fb = ~0;
-            break;
-        }
-        m = GENERIC (s,d,0,GET_COMP(Fa,0),GET_COMP(Fb,0),t, u, v);
-        n = GENERIC (s,d,G_SHIFT,GET_COMP(Fa,G_SHIFT),GET_COMP(Fb,G_SHIFT),t, u, v);
-        o = GENERIC (s,d,R_SHIFT,GET_COMP(Fa,R_SHIFT),GET_COMP(Fb,R_SHIFT),t, u, v);
-        p = GENERIC (s,d,A_SHIFT,GET_COMP(Fa,A_SHIFT),GET_COMP(Fb,A_SHIFT),t, u, v);
-        s = m|n|o|p;
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    m = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> 0), da);
+	    n = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (comp4_t)combine_disjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A_IN:
+	    m = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> 0), da);
+	    n = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (comp4_t)combine_disjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A:
+	    Fa = ~0;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    m = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> 0));
+	    n = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (comp4_t)combine_disjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B_IN:
+	    m = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> 0));
+	    n = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (comp4_t)combine_disjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B:
+	    Fb = ~0;
+	    break;
+	}
+	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
+	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
+	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
+	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
+
+	s = m | n | o | p;
+
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_disjoint_over_ca (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_over_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          comp4_t *                dest,
+                          const comp4_t *          src,
+                          const comp4_t *          mask,
+                          int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
 }
 
 static void
-combine_disjoint_in_ca (pixman_implementation_t *imp, pixman_op_t op,
-		      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_in_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
 }
 
 static void
-combine_disjoint_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			     comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_in_reverse_ca (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                comp4_t *                dest,
+                                const comp4_t *          src,
+                                const comp4_t *          mask,
+                                int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
 }
 
 static void
-combine_disjoint_out_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_out_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
 }
 
 static void
-combine_disjoint_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_out_reverse_ca (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 comp4_t *                dest,
+                                 const comp4_t *          src,
+                                 const comp4_t *          mask,
+                                 int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
 }
 
 static void
-combine_disjoint_atop_ca (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_atop_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          comp4_t *                dest,
+                          const comp4_t *          src,
+                          const comp4_t *          mask,
+                          int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
 }
 
 static void
-combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_atop_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  comp4_t *                dest,
+                                  const comp4_t *          src,
+                                  const comp4_t *          mask,
+                                  int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
 }
 
 static void
-combine_disjoint_xor_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_disjoint_xor_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_disjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
 }
 
 static void
-combine_conjoint_general_ca (comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width, comp1_t combine)
+combine_conjoint_general_ca (comp4_t *      dest,
+                             const comp4_t *src,
+                             const comp4_t *mask,
+                             int            width,
+                             comp1_t        combine)
 {
     int i;
 
-    for (i = 0; i < width; ++i) {
-        comp4_t s, d;
-        comp4_t m,n,o,p;
-        comp4_t Fa, Fb;
-        comp2_t t, u, v;
-        comp4_t sa;
-        comp1_t da;
-
-        s = *(src + i);
-        m = *(mask + i);
-        d = *(dest + i);
-        da = d >> A_SHIFT;
+    for (i = 0; i < width; ++i)
+    {
+	comp4_t s, d;
+	comp4_t m, n, o, p;
+	comp4_t Fa, Fb;
+	comp2_t t, u, v;
+	comp4_t sa;
+	comp1_t da;
+
+	s = *(src + i);
+	m = *(mask + i);
+	d = *(dest + i);
+	da = d >> A_SHIFT;
 
 	combine_mask_ca (&s, &m);
 
-        sa = m;
-
-        switch (combine & COMBINE_A) {
-        default:
-            Fa = 0;
-            break;
-        case COMBINE_A_OUT:
-            m = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> 0), da);
-            n = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-            p = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-            Fa = m|n|o|p;
-            break;
-        case COMBINE_A_IN:
-            m = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> 0), da);
-            n = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
-            o = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
-            p = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
-            Fa = m|n|o|p;
-            break;
-        case COMBINE_A:
-            Fa = ~0;
-            break;
-        }
-
-        switch (combine & COMBINE_B) {
-        default:
-            Fb = 0;
-            break;
-        case COMBINE_B_OUT:
-            m = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> 0));
-            n = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-            p = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-            Fb = m|n|o|p;
-            break;
-        case COMBINE_B_IN:
-            m = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> 0));
-            n = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
-            o = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
-            p = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
-            Fb = m|n|o|p;
-            break;
-        case COMBINE_B:
-            Fb = ~0;
-            break;
-        }
-        m = GENERIC (s,d,0,GET_COMP(Fa,0),GET_COMP(Fb,0),t, u, v);
-        n = GENERIC (s,d,G_SHIFT,GET_COMP(Fa,G_SHIFT),GET_COMP(Fb,G_SHIFT),t, u, v);
-        o = GENERIC (s,d,R_SHIFT,GET_COMP(Fa,R_SHIFT),GET_COMP(Fb,R_SHIFT),t, u, v);
-        p = GENERIC (s,d,A_SHIFT,GET_COMP(Fa,A_SHIFT),GET_COMP(Fb,A_SHIFT),t, u, v);
-        s = m|n|o|p;
+	sa = m;
+
+	switch (combine & COMBINE_A)
+	{
+	default:
+	    Fa = 0;
+	    break;
+
+	case COMBINE_A_OUT:
+	    m = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> 0), da);
+	    n = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (comp4_t)combine_conjoint_out_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A_IN:
+	    m = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> 0), da);
+	    n = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> G_SHIFT), da) << G_SHIFT;
+	    o = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> R_SHIFT), da) << R_SHIFT;
+	    p = (comp4_t)combine_conjoint_in_part ((comp1_t) (sa >> A_SHIFT), da) << A_SHIFT;
+	    Fa = m | n | o | p;
+	    break;
+
+	case COMBINE_A:
+	    Fa = ~0;
+	    break;
+	}
+
+	switch (combine & COMBINE_B)
+	{
+	default:
+	    Fb = 0;
+	    break;
+
+	case COMBINE_B_OUT:
+	    m = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> 0));
+	    n = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (comp4_t)combine_conjoint_out_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B_IN:
+	    m = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> 0));
+	    n = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> G_SHIFT)) << G_SHIFT;
+	    o = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> R_SHIFT)) << R_SHIFT;
+	    p = (comp4_t)combine_conjoint_in_part (da, (comp1_t) (sa >> A_SHIFT)) << A_SHIFT;
+	    Fb = m | n | o | p;
+	    break;
+
+	case COMBINE_B:
+	    Fb = ~0;
+	    break;
+	}
+	m = GENERIC (s, d, 0, GET_COMP (Fa, 0), GET_COMP (Fb, 0), t, u, v);
+	n = GENERIC (s, d, G_SHIFT, GET_COMP (Fa, G_SHIFT), GET_COMP (Fb, G_SHIFT), t, u, v);
+	o = GENERIC (s, d, R_SHIFT, GET_COMP (Fa, R_SHIFT), GET_COMP (Fb, R_SHIFT), t, u, v);
+	p = GENERIC (s, d, A_SHIFT, GET_COMP (Fa, A_SHIFT), GET_COMP (Fb, A_SHIFT), t, u, v);
+
+	s = m | n | o | p;
+
 	*(dest + i) = s;
     }
 }
 
 static void
-combine_conjoint_over_ca (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_over_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          comp4_t *                dest,
+                          const comp4_t *          src,
+                          const comp4_t *          mask,
+                          int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OVER);
 }
 
 static void
-combine_conjoint_over_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_over_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  comp4_t *                dest,
+                                  const comp4_t *          src,
+                                  const comp4_t *          mask,
+                                  int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OVER);
 }
 
 static void
-combine_conjoint_in_ca (pixman_implementation_t *imp, pixman_op_t op,
-		      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_in_ca (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        comp4_t *                dest,
+                        const comp4_t *          src,
+                        const comp4_t *          mask,
+                        int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_IN);
 }
 
 static void
-combine_conjoint_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			     comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_in_reverse_ca (pixman_implementation_t *imp,
+                                pixman_op_t              op,
+                                comp4_t *                dest,
+                                const comp4_t *          src,
+                                const comp4_t *          mask,
+                                int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_IN);
 }
 
 static void
-combine_conjoint_out_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_out_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_OUT);
 }
 
 static void
-combine_conjoint_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			      comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_out_reverse_ca (pixman_implementation_t *imp,
+                                 pixman_op_t              op,
+                                 comp4_t *                dest,
+                                 const comp4_t *          src,
+                                 const comp4_t *          mask,
+                                 int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_OUT);
 }
 
 static void
-combine_conjoint_atop_ca (pixman_implementation_t *imp, pixman_op_t op,
-			comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_atop_ca (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          comp4_t *                dest,
+                          const comp4_t *          src,
+                          const comp4_t *          mask,
+                          int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_A_ATOP);
 }
 
 static void
-combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_atop_reverse_ca (pixman_implementation_t *imp,
+                                  pixman_op_t              op,
+                                  comp4_t *                dest,
+                                  const comp4_t *          src,
+                                  const comp4_t *          mask,
+                                  int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_B_ATOP);
 }
 
 static void
-combine_conjoint_xor_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       comp4_t *dest, const comp4_t *src, const comp4_t *mask, int width)
+combine_conjoint_xor_ca (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         comp4_t *                dest,
+                         const comp4_t *          src,
+                         const comp4_t *          mask,
+                         int                      width)
 {
     combine_conjoint_general_ca (dest, src, mask, width, COMBINE_XOR);
 }
@@ -2007,6 +2429,7 @@ _pixman_setup_combiner_functions_width (pixman_implementation_t *imp)
     imp->combine_width_ca[PIXMAN_OP_SOFT_LIGHT] = combine_soft_light_ca;
     imp->combine_width_ca[PIXMAN_OP_DIFFERENCE] = combine_difference_ca;
     imp->combine_width_ca[PIXMAN_OP_EXCLUSION] = combine_exclusion_ca;
+
     /* It is not clear that these make sense, so leave them out for now */
     imp->combine_width_ca[PIXMAN_OP_HSL_HUE] = NULL;
     imp->combine_width_ca[PIXMAN_OP_HSL_SATURATION] = NULL;
@@ -2014,4 +2437,3 @@ _pixman_setup_combiner_functions_width (pixman_implementation_t *imp)
     imp->combine_width_ca[PIXMAN_OP_HSL_LUMINOSITY] = NULL;
 }
 
-
commit d57b55bb2662837feafb4f9f88d10549164ee142
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 22:05:32 2009 -0400

    Reindent and reformat pixman-vmx.c

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index b20740c..6fc3cde 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -33,48 +33,52 @@
 #define AVV(x...) {x}
 
 static force_inline vector unsigned int
-splat_alpha (vector unsigned int pix) {
+splat_alpha (vector unsigned int pix)
+{
     return vec_perm (pix, pix,
-    (vector unsigned char)AVV(0x00,0x00,0x00,0x00, 0x04,0x04,0x04,0x04,
-                               0x08,0x08,0x08,0x08, 0x0C,0x0C,0x0C,0x0C));
+		     (vector unsigned char)AVV (
+			 0x00, 0x00, 0x00, 0x00, 0x04, 0x04, 0x04, 0x04,
+			 0x08, 0x08, 0x08, 0x08, 0x0C, 0x0C, 0x0C, 0x0C));
 }
 
 static force_inline vector unsigned int
 pix_multiply (vector unsigned int p, vector unsigned int a)
 {
     vector unsigned short hi, lo, mod;
+    
     /* unpack to short */
     hi = (vector unsigned short)
-                    vec_mergeh ((vector unsigned char)AVV(0),
-                                (vector unsigned char)p);
+	vec_mergeh ((vector unsigned char)AVV (0),
+		    (vector unsigned char)p);
+    
     mod = (vector unsigned short)
-                    vec_mergeh ((vector unsigned char)AVV(0),
-                                (vector unsigned char)a);
-
+	vec_mergeh ((vector unsigned char)AVV (0),
+		    (vector unsigned char)a);
+    
     hi = vec_mladd (hi, mod, (vector unsigned short)
-                            AVV(0x0080,0x0080,0x0080,0x0080,
-                                 0x0080,0x0080,0x0080,0x0080));
-
+                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
+                         0x0080, 0x0080, 0x0080, 0x0080));
+    
     hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
-
+    
     hi = vec_sr (hi, vec_splat_u16 (8));
-
+    
     /* unpack to short */
     lo = (vector unsigned short)
-                    vec_mergel ((vector unsigned char)AVV(0),
-                                (vector unsigned char)p);
+	vec_mergel ((vector unsigned char)AVV (0),
+		    (vector unsigned char)p);
     mod = (vector unsigned short)
-                    vec_mergel ((vector unsigned char)AVV(0),
-                                (vector unsigned char)a);
-
+	vec_mergel ((vector unsigned char)AVV (0),
+		    (vector unsigned char)a);
+    
     lo = vec_mladd (lo, mod, (vector unsigned short)
-                            AVV(0x0080,0x0080,0x0080,0x0080,
-                                 0x0080,0x0080,0x0080,0x0080));
-
+                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
+                         0x0080, 0x0080, 0x0080, 0x0080));
+    
     lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
-
+    
     lo = vec_sr (lo, vec_splat_u16 (8));
-
+    
     return (vector unsigned int)vec_packsu (hi, lo);
 }
 
@@ -82,62 +86,64 @@ static force_inline vector unsigned int
 pix_add (vector unsigned int a, vector unsigned int b)
 {
     return (vector unsigned int)vec_adds ((vector unsigned char)a,
-                     (vector unsigned char)b);
+                                          (vector unsigned char)b);
 }
 
 static force_inline vector unsigned int
-pix_add_mul (vector unsigned int x, vector unsigned int a,
-             vector unsigned int y, vector unsigned int b)
+pix_add_mul (vector unsigned int x,
+             vector unsigned int a,
+             vector unsigned int y,
+             vector unsigned int b)
 {
     vector unsigned short hi, lo, mod, hiy, loy, mody;
-
+    
     hi = (vector unsigned short)
-                    vec_mergeh ((vector unsigned char)AVV(0),
-                                (vector unsigned char)x);
+	vec_mergeh ((vector unsigned char)AVV (0),
+		    (vector unsigned char)x);
     mod = (vector unsigned short)
-                    vec_mergeh ((vector unsigned char)AVV(0),
-                                (vector unsigned char)a);
+	vec_mergeh ((vector unsigned char)AVV (0),
+		    (vector unsigned char)a);
     hiy = (vector unsigned short)
-                    vec_mergeh ((vector unsigned char)AVV(0),
-                                (vector unsigned char)y);
+	vec_mergeh ((vector unsigned char)AVV (0),
+		    (vector unsigned char)y);
     mody = (vector unsigned short)
-                    vec_mergeh ((vector unsigned char)AVV(0),
-                                (vector unsigned char)b);
-
+	vec_mergeh ((vector unsigned char)AVV (0),
+		    (vector unsigned char)b);
+    
     hi = vec_mladd (hi, mod, (vector unsigned short)
-                             AVV(0x0080,0x0080,0x0080,0x0080,
-                                  0x0080,0x0080,0x0080,0x0080));
-
+                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
+                         0x0080, 0x0080, 0x0080, 0x0080));
+    
     hi = vec_mladd (hiy, mody, hi);
-
+    
     hi = vec_adds (hi, vec_sr (hi, vec_splat_u16 (8)));
-
+    
     hi = vec_sr (hi, vec_splat_u16 (8));
-
+    
     lo = (vector unsigned short)
-                    vec_mergel ((vector unsigned char)AVV(0),
-                                (vector unsigned char)x);
+	vec_mergel ((vector unsigned char)AVV (0),
+		    (vector unsigned char)x);
     mod = (vector unsigned short)
-                    vec_mergel ((vector unsigned char)AVV(0),
-                                (vector unsigned char)a);
-
+	vec_mergel ((vector unsigned char)AVV (0),
+		    (vector unsigned char)a);
+    
     loy = (vector unsigned short)
-                    vec_mergel ((vector unsigned char)AVV(0),
-                                (vector unsigned char)y);
+	vec_mergel ((vector unsigned char)AVV (0),
+		    (vector unsigned char)y);
     mody = (vector unsigned short)
-                    vec_mergel ((vector unsigned char)AVV(0),
-                                (vector unsigned char)b);
-
+	vec_mergel ((vector unsigned char)AVV (0),
+		    (vector unsigned char)b);
+    
     lo = vec_mladd (lo, mod, (vector unsigned short)
-                             AVV(0x0080,0x0080,0x0080,0x0080,
-                                  0x0080,0x0080,0x0080,0x0080));
-
+                    AVV (0x0080, 0x0080, 0x0080, 0x0080,
+                         0x0080, 0x0080, 0x0080, 0x0080));
+    
     lo = vec_mladd (loy, mody, lo);
-
+    
     lo = vec_adds (lo, vec_sr (lo, vec_splat_u16 (8)));
-
+    
     lo = vec_sr (lo, vec_splat_u16 (8));
-
+    
     return (vector unsigned int)vec_packsu (hi, lo);
 }
 
@@ -146,1366 +152,1517 @@ negate (vector unsigned int src)
 {
     return vec_nor (src, src);
 }
+
 /* dest*~srca + src */
 static force_inline vector unsigned int
-over (vector unsigned int src, vector unsigned int srca,
+over (vector unsigned int src,
+      vector unsigned int srca,
       vector unsigned int dest)
 {
     vector unsigned char tmp = (vector unsigned char)
-                                pix_multiply (dest, negate (srca));
+	pix_multiply (dest, negate (srca));
+    
     tmp = vec_adds ((vector unsigned char)src, tmp);
     return (vector unsigned int)tmp;
 }
 
 /* in == pix_multiply */
-#define in_over(src, srca, mask, dest) over (pix_multiply (src, mask),\
-                                             pix_multiply (srca, mask), dest)
+#define in_over(src, srca, mask, dest)					\
+    over (pix_multiply (src, mask),					\
+          pix_multiply (srca, mask), dest)
 
 
-#define COMPUTE_SHIFT_MASK(source) \
+#define COMPUTE_SHIFT_MASK(source)					\
     source ## _mask = vec_lvsl (0, source);
 
-#define COMPUTE_SHIFT_MASKS(dest, source) \
-    dest ## _mask = vec_lvsl (0, dest); \
-    source ## _mask = vec_lvsl (0, source); \
+#define COMPUTE_SHIFT_MASKS(dest, source)				\
+    dest ## _mask = vec_lvsl (0, dest);					\
+    source ## _mask = vec_lvsl (0, source);				\
     store_mask = vec_lvsr (0, dest);
 
-#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
-    mask ## _mask = vec_lvsl (0, mask); \
-    dest ## _mask = vec_lvsl (0, dest); \
-    source ## _mask = vec_lvsl (0, source); \
+#define COMPUTE_SHIFT_MASKC(dest, source, mask)				\
+    mask ## _mask = vec_lvsl (0, mask);					\
+    dest ## _mask = vec_lvsl (0, dest);					\
+    source ## _mask = vec_lvsl (0, source);				\
     store_mask = vec_lvsr (0, dest);
 
 /* notice you have to declare temp vars...
  * Note: tmp3 and tmp4 must remain untouched!
  */
 
-#define LOAD_VECTORS(dest, source) \
-        tmp1 = (typeof(tmp1))vec_ld(0, source); \
-        tmp2 = (typeof(tmp2))vec_ld(15, source); \
-        tmp3 = (typeof(tmp3))vec_ld(0, dest); \
-        v ## source = (typeof(v ## source)) \
-                       vec_perm(tmp1, tmp2, source ## _mask); \
-        tmp4 = (typeof(tmp4))vec_ld(15, dest); \
-        v ## dest = (typeof(v ## dest)) \
-                     vec_perm(tmp3, tmp4, dest ## _mask);
-
-#define LOAD_VECTORSC(dest, source, mask) \
-        tmp1 = (typeof(tmp1))vec_ld(0, source); \
-        tmp2 = (typeof(tmp2))vec_ld(15, source); \
-        tmp3 = (typeof(tmp3))vec_ld(0, dest); \
-        v ## source = (typeof(v ## source)) \
-                       vec_perm(tmp1, tmp2, source ## _mask); \
-        tmp4 = (typeof(tmp4))vec_ld(15, dest); \
-        tmp1 = (typeof(tmp1))vec_ld(0, mask); \
-        v ## dest = (typeof(v ## dest)) \
-                     vec_perm(tmp3, tmp4, dest ## _mask); \
-        tmp2 = (typeof(tmp2))vec_ld(15, mask); \
-        v ## mask = (typeof(v ## mask)) \
-                     vec_perm(tmp1, tmp2, mask ## _mask);
-
-#define LOAD_VECTORSM(dest, source, mask) \
-        LOAD_VECTORSC(dest, source, mask) \
-        v ## source = pix_multiply(v ## source, \
-                                   splat_alpha (v ## mask));
-
-#define STORE_VECTOR(dest) \
-        edges = vec_perm (tmp4, tmp3, dest ## _mask); \
-        tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
-        tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
-        vec_st ((vector unsigned int) tmp3, 15, dest ); \
-        vec_st ((vector unsigned int) tmp1, 0, dest );
+#define LOAD_VECTORS(dest, source)			  \
+    tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
+    tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
+    tmp3 = (typeof(tmp3))vec_ld (0, dest);		  \
+    v ## source = (typeof(v ## source))			  \
+	vec_perm (tmp1, tmp2, source ## _mask);		  \
+    tmp4 = (typeof(tmp4))vec_ld (15, dest);		  \
+    v ## dest = (typeof(v ## dest))			  \
+	vec_perm (tmp3, tmp4, dest ## _mask);
+
+#define LOAD_VECTORSC(dest, source, mask)		  \
+    tmp1 = (typeof(tmp1))vec_ld (0, source);		  \
+    tmp2 = (typeof(tmp2))vec_ld (15, source);		  \
+    tmp3 = (typeof(tmp3))vec_ld (0, dest);		  \
+    v ## source = (typeof(v ## source))			  \
+	vec_perm (tmp1, tmp2, source ## _mask);		  \
+    tmp4 = (typeof(tmp4))vec_ld (15, dest);		  \
+    tmp1 = (typeof(tmp1))vec_ld (0, mask);		  \
+    v ## dest = (typeof(v ## dest))			  \
+	vec_perm (tmp3, tmp4, dest ## _mask);		  \
+    tmp2 = (typeof(tmp2))vec_ld (15, mask);		  \
+    v ## mask = (typeof(v ## mask))			  \
+	vec_perm (tmp1, tmp2, mask ## _mask);
+
+#define LOAD_VECTORSM(dest, source, mask)				\
+    LOAD_VECTORSC (dest, source, mask)					\
+    v ## source = pix_multiply (v ## source,				\
+                                splat_alpha (v ## mask));
+
+#define STORE_VECTOR(dest)						\
+    edges = vec_perm (tmp4, tmp3, dest ## _mask);			\
+    tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
+    tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
+    vec_st ((vector unsigned int) tmp3, 15, dest);			\
+    vec_st ((vector unsigned int) tmp1, 0, dest);
 
 static void
-vmx_combine_over_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_over_u_no_mask (uint32_t *      dest,
+                            const uint32_t *src,
+                            int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = over (vsrc, splat_alpha (vsrc), vdest);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = over (vsrc, splat_alpha (vsrc), vdest);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t ia = ALPHA_8 (~s);
-
-        UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t ia = ALPHA_8 (~s);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+	
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_over_u_mask (uint32_t *dest,
-                     const uint32_t *src,
-                     const uint32_t *mask,
-                     int width)
+vmx_combine_over_u_mask (uint32_t *      dest,
+                         const uint32_t *src,
+                         const uint32_t *mask,
+                         int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask);
-
-        vdest = over (vsrc, splat_alpha (vsrc), vdest);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = over (vsrc, splat_alpha (vsrc), vdest);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t ia;
-
-        UN8x4_MUL_UN8 (s, m);
-
-        ia = ALPHA_8 (~s);
-
-        UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t ia;
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	ia = ALPHA_8 (~s);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_over_u(pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask,
-                int width)
+vmx_combine_over_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     if (mask)
-        vmx_combine_over_u_mask(dest, src, mask, width);
+	vmx_combine_over_u_mask (dest, src, mask, width);
     else
-        vmx_combine_over_u_no_mask(dest, src, width);
+	vmx_combine_over_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_over_reverse_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_over_reverse_u_no_mask (uint32_t *      dest,
+                                    const uint32_t *src,
+                                    int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = over (vdest, splat_alpha (vdest) , vsrc);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = over (vdest, splat_alpha (vdest), vsrc);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t ia = ALPHA_8 (~dest[i]);
-
-        UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t ia = ALPHA_8 (~dest[i]);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_over_reverse_u_mask (uint32_t *dest,
-                            const uint32_t *src,
-                            const uint32_t *mask,
-                            int width)
+vmx_combine_over_reverse_u_mask (uint32_t *      dest,
+                                 const uint32_t *src,
+                                 const uint32_t *mask,
+                                 int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = over (vdest, splat_alpha (vdest) , vsrc);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = over (vdest, splat_alpha (vdest), vsrc);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t ia = ALPHA_8 (~dest[i]);
-
-        UN8x4_MUL_UN8 (s, m);
-
-        UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t ia = ALPHA_8 (~dest[i]);
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src,
-                        const uint32_t *mask, int width)
+vmx_combine_over_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     if (mask)
-        vmx_combine_over_reverse_u_mask(dest, src, mask, width);
+	vmx_combine_over_reverse_u_mask (dest, src, mask, width);
     else
-        vmx_combine_over_reverse_u_no_mask(dest, src, width);
+	vmx_combine_over_reverse_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_in_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_in_u_no_mask (uint32_t *      dest,
+                          const uint32_t *src,
+                          int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_multiply (vsrc, splat_alpha (vdest));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_multiply (vsrc, splat_alpha (vdest));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-
-        uint32_t s = src[i];
-        uint32_t a = ALPHA_8 (dest[i]);
-        UN8x4_MUL_UN8 (s, a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	
+	uint32_t s = src[i];
+	uint32_t a = ALPHA_8 (dest[i]);
+	UN8x4_MUL_UN8 (s, a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_in_u_mask (uint32_t *dest,
-                   const uint32_t *src,
-                   const uint32_t *mask,
-                   int width)
+vmx_combine_in_u_mask (uint32_t *      dest,
+                       const uint32_t *src,
+                       const uint32_t *mask,
+                       int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_multiply (vsrc, splat_alpha (vdest));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_multiply (vsrc, splat_alpha (vdest));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t a = ALPHA_8 (dest[i]);
-
-        UN8x4_MUL_UN8 (s, m);
-
-        UN8x4_MUL_UN8 (s, a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t a = ALPHA_8 (dest[i]);
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	UN8x4_MUL_UN8 (s, a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_in_u (pixman_implementation_t *imp, pixman_op_t op,
-	       uint32_t *dest, const uint32_t *src, const uint32_t *mask,
-               int width)
+vmx_combine_in_u (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *               dest,
+                  const uint32_t *         src,
+                  const uint32_t *         mask,
+                  int                      width)
 {
     if (mask)
-        vmx_combine_in_u_mask(dest, src, mask, width);
+	vmx_combine_in_u_mask (dest, src, mask, width);
     else
-        vmx_combine_in_u_no_mask(dest, src, width);
+	vmx_combine_in_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_in_reverse_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_in_reverse_u_no_mask (uint32_t *      dest,
+                                  const uint32_t *src,
+                                  int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_multiply (vdest, splat_alpha (vsrc));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_multiply (vdest, splat_alpha (vsrc));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t d = dest[i];
-        uint32_t a = ALPHA_8 (src[i]);
-        UN8x4_MUL_UN8 (d, a);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t d = dest[i];
+	uint32_t a = ALPHA_8 (src[i]);
+	UN8x4_MUL_UN8 (d, a);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_in_reverse_u_mask (uint32_t *dest,
-                          const uint32_t *src,
-                          const uint32_t *mask,
-                          int width)
+vmx_combine_in_reverse_u_mask (uint32_t *      dest,
+                               const uint32_t *src,
+                               const uint32_t *mask,
+                               int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_multiply (vdest, splat_alpha (vsrc));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_multiply (vdest, splat_alpha (vsrc));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t d = dest[i];
-        uint32_t a = src[i];
-
-        UN8x4_MUL_UN8 (a, m);
-
-        a = ALPHA_8 (a);
-        UN8x4_MUL_UN8 (d, a);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t d = dest[i];
+	uint32_t a = src[i];
+	
+	UN8x4_MUL_UN8 (a, m);
+	
+	a = ALPHA_8 (a);
+	UN8x4_MUL_UN8 (d, a);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		      uint32_t *dest, const uint32_t *src,
-                      const uint32_t *mask, int width)
+vmx_combine_in_reverse_u (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *               dest,
+                          const uint32_t *         src,
+                          const uint32_t *         mask,
+                          int                      width)
 {
     if (mask)
-        vmx_combine_in_reverse_u_mask(dest, src, mask, width);
+	vmx_combine_in_reverse_u_mask (dest, src, mask, width);
     else
-        vmx_combine_in_reverse_u_no_mask(dest, src, width);
+	vmx_combine_in_reverse_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_out_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_out_u_no_mask (uint32_t *      dest,
+                           const uint32_t *src,
+                           int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t a = ALPHA_8 (~dest[i]);
-        UN8x4_MUL_UN8 (s, a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t a = ALPHA_8 (~dest[i]);
+	UN8x4_MUL_UN8 (s, a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_out_u_mask (uint32_t *dest,
-                    const uint32_t *src,
-                    const uint32_t *mask,
-                    int width)
+vmx_combine_out_u_mask (uint32_t *      dest,
+                        const uint32_t *src,
+                        const uint32_t *mask,
+                        int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_multiply (vsrc, splat_alpha (negate (vdest)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t a = ALPHA_8 (~dest[i]);
-
-        UN8x4_MUL_UN8 (s, m);
-
-        UN8x4_MUL_UN8 (s, a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t a = ALPHA_8 (~dest[i]);
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	UN8x4_MUL_UN8 (s, a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_out_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask,
-                int width)
+vmx_combine_out_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     if (mask)
-        vmx_combine_out_u_mask(dest, src, mask, width);
+	vmx_combine_out_u_mask (dest, src, mask, width);
     else
-        vmx_combine_out_u_no_mask(dest, src, width);
+	vmx_combine_out_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_out_reverse_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_out_reverse_u_no_mask (uint32_t *      dest,
+                                   const uint32_t *src,
+                                   int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t d = dest[i];
-        uint32_t a = ALPHA_8 (~src[i]);
-        UN8x4_MUL_UN8 (d, a);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t d = dest[i];
+	uint32_t a = ALPHA_8 (~src[i]);
+	UN8x4_MUL_UN8 (d, a);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_out_reverse_u_mask (uint32_t *dest,
-                           const uint32_t *src,
-                           const uint32_t *mask,
-                           int width)
+vmx_combine_out_reverse_u_mask (uint32_t *      dest,
+                                const uint32_t *src,
+                                const uint32_t *mask,
+                                int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_multiply (vdest, splat_alpha (negate (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t d = dest[i];
-        uint32_t a = src[i];
-
-        UN8x4_MUL_UN8 (a, m);
-
-        a = ALPHA_8 (~a);
-        UN8x4_MUL_UN8 (d, a);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t d = dest[i];
+	uint32_t a = src[i];
+	
+	UN8x4_MUL_UN8 (a, m);
+	
+	a = ALPHA_8 (~a);
+	UN8x4_MUL_UN8 (d, a);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		       uint32_t *dest,
-                       const uint32_t *src,
-                       const uint32_t *mask,
-                       int width)
+vmx_combine_out_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
 {
     if (mask)
-        vmx_combine_out_reverse_u_mask(dest, src, mask, width);
+	vmx_combine_out_reverse_u_mask (dest, src, mask, width);
     else
-        vmx_combine_out_reverse_u_no_mask(dest, src, width);
+	vmx_combine_out_reverse_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_atop_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_atop_u_no_mask (uint32_t *      dest,
+                            const uint32_t *src,
+                            int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_add_mul (vsrc, splat_alpha (vdest),
-                            vdest, splat_alpha (negate (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_add_mul (vsrc, splat_alpha (vdest),
+			     vdest, splat_alpha (negate (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t dest_a = ALPHA_8 (d);
-        uint32_t src_ia = ALPHA_8 (~s);
-
-        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t dest_a = ALPHA_8 (d);
+	uint32_t src_ia = ALPHA_8 (~s);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_atop_u_mask (uint32_t *dest,
-                     const uint32_t *src,
-                     const uint32_t *mask,
-                     int width)
+vmx_combine_atop_u_mask (uint32_t *      dest,
+                         const uint32_t *src,
+                         const uint32_t *mask,
+                         int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_add_mul (vsrc, splat_alpha (vdest),
-                            vdest, splat_alpha (negate (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_add_mul (vsrc, splat_alpha (vdest),
+			     vdest, splat_alpha (negate (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t dest_a = ALPHA_8 (d);
-        uint32_t src_ia;
-
-        UN8x4_MUL_UN8 (s, m);
-
-        src_ia = ALPHA_8 (~s);
-
-        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t dest_a = ALPHA_8 (d);
+	uint32_t src_ia;
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	src_ia = ALPHA_8 (~s);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_atop_u (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest,
-                 const uint32_t *src,
-                 const uint32_t *mask,
-                 int width)
+vmx_combine_atop_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     if (mask)
-        vmx_combine_atop_u_mask(dest, src, mask, width);
+	vmx_combine_atop_u_mask (dest, src, mask, width);
     else
-        vmx_combine_atop_u_no_mask(dest, src, width);
+	vmx_combine_atop_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_atop_reverse_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_atop_reverse_u_no_mask (uint32_t *      dest,
+                                    const uint32_t *src,
+                                    int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_add_mul (vdest, splat_alpha (vsrc),
-                            vsrc, splat_alpha (negate (vdest)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_add_mul (vdest, splat_alpha (vsrc),
+			     vsrc, splat_alpha (negate (vdest)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t src_a = ALPHA_8 (s);
-        uint32_t dest_ia = ALPHA_8 (~d);
-
-        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t src_a = ALPHA_8 (s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_atop_reverse_u_mask (uint32_t *dest,
-                            const uint32_t *src,
-                            const uint32_t *mask,
-                            int width)
+vmx_combine_atop_reverse_u_mask (uint32_t *      dest,
+                                 const uint32_t *src,
+                                 const uint32_t *mask,
+                                 int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_add_mul (vdest, splat_alpha (vsrc),
-                            vsrc, splat_alpha (negate (vdest)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_add_mul (vdest, splat_alpha (vsrc),
+			     vsrc, splat_alpha (negate (vdest)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t src_a;
-        uint32_t dest_ia = ALPHA_8 (~d);
-
-        UN8x4_MUL_UN8 (s, m);
-
-        src_a = ALPHA_8 (s);
-
-        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t src_a;
+	uint32_t dest_ia = ALPHA_8 (~d);
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	src_a = ALPHA_8 (s);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest,
-                        const uint32_t *src,
-                        const uint32_t *mask,
-                        int width)
+vmx_combine_atop_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     if (mask)
-        vmx_combine_atop_reverse_u_mask(dest, src, mask, width);
+	vmx_combine_atop_reverse_u_mask (dest, src, mask, width);
     else
-        vmx_combine_atop_reverse_u_no_mask(dest, src, width);
+	vmx_combine_atop_reverse_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_xor_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_xor_u_no_mask (uint32_t *      dest,
+                           const uint32_t *src,
+                           int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
-
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS (dest, src)
-
-        vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
-                            vdest, splat_alpha (negate (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
+			     vdest, splat_alpha (negate (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t src_ia = ALPHA_8 (~s);
-        uint32_t dest_ia = ALPHA_8 (~d);
-
-        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t src_ia = ALPHA_8 (~s);
+	uint32_t dest_ia = ALPHA_8 (~d);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_xor_u_mask (uint32_t *dest,
-                    const uint32_t *src,
-                    const uint32_t *mask,
-                    int width)
+vmx_combine_xor_u_mask (uint32_t *      dest,
+                        const uint32_t *src,
+                        const uint32_t *mask,
+                        int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
-                            vdest, splat_alpha (negate (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_add_mul (vsrc, splat_alpha (negate (vdest)),
+			     vdest, splat_alpha (negate (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t src_ia;
-        uint32_t dest_ia = ALPHA_8 (~d);
-
-        UN8x4_MUL_UN8 (s, m);
-
-        src_ia = ALPHA_8 (~s);
-
-        UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t src_ia;
+	uint32_t dest_ia = ALPHA_8 (~d);
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	src_ia = ALPHA_8 (~s);
+	
+	UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_xor_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest,
-                const uint32_t *src,
-                const uint32_t *mask,
-                int width)
+vmx_combine_xor_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     if (mask)
-        vmx_combine_xor_u_mask(dest, src, mask, width);
+	vmx_combine_xor_u_mask (dest, src, mask, width);
     else
-        vmx_combine_xor_u_no_mask(dest, src, width);
+	vmx_combine_xor_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_add_u_no_mask (uint32_t *dest, const uint32_t *src, int width)
+vmx_combine_add_u_no_mask (uint32_t *      dest,
+                           const uint32_t *src,
+                           int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc;
+    vector unsigned int vdest, vsrc;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKS(dest, src)
+	dest_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKS (dest, src);
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORS(dest, src)
-
-        vdest = pix_add (vsrc, vdest);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORS (dest, src);
+	
+	vdest = pix_add (vsrc, vdest);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        UN8x4_ADD_UN8x4 (d, s);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	UN8x4_ADD_UN8x4 (d, s);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_add_u_mask (uint32_t *dest,
-                    const uint32_t *src,
-                    const uint32_t *mask,
-                    int width)
+vmx_combine_add_u_mask (uint32_t *      dest,
+                        const uint32_t *src,
+                        const uint32_t *mask,
+                        int             width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, src_mask, mask_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, src_mask, mask_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSM(dest, src, mask)
-
-        vdest = pix_add (vsrc, vdest);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSM (dest, src, mask);
+	
+	vdest = pix_add (vsrc, vdest);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t m = ALPHA_8 (mask[i]);
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-
-        UN8x4_MUL_UN8 (s, m);
-
-        UN8x4_ADD_UN8x4 (d, s);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t m = ALPHA_8 (mask[i]);
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	
+	UN8x4_MUL_UN8 (s, m);
+	
+	UN8x4_ADD_UN8x4 (d, s);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_add_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest,
-                const uint32_t *src,
-                const uint32_t *mask,
-                int width)
+vmx_combine_add_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     if (mask)
-        vmx_combine_add_u_mask(dest, src, mask, width);
+	vmx_combine_add_u_mask (dest, src, mask, width);
     else
-        vmx_combine_add_u_no_mask(dest, src, width);
+	vmx_combine_add_u_no_mask (dest, src, width);
 }
 
 static void
-vmx_combine_src_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_src_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask);
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_multiply (vsrc, vmask);
-
-        STORE_VECTOR(dest)
-
-        mask+=4;
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_multiply (vsrc, vmask);
+	
+	STORE_VECTOR (dest);
+	
+	mask += 4;
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        UN8x4_MUL_UN8x4 (s, a);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	UN8x4_MUL_UN8x4 (s, a);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_over_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_over_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask);
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
-
-        STORE_VECTOR(dest)
-
-        mask+=4;
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = in_over (vsrc, splat_alpha (vsrc), vmask, vdest);
+	
+	STORE_VECTOR (dest);
+	
+	mask += 4;
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_over_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
 
-    COMPUTE_SHIFT_MASKC(dest, src, mask);
     /* printf("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC (dest, src, mask)
-
-        vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
-
-        STORE_VECTOR(dest)
-
-        mask+=4;
-        src+=4;
-        dest+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = over (vdest, splat_alpha (vdest), pix_multiply (vsrc, vmask));
+	
+	STORE_VECTOR (dest);
+	
+	mask += 4;
+	src += 4;
+	dest += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t da = ALPHA_8 (d);
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ~da, d);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t da = ALPHA_8 (d);
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ~da, d);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_in_ca (pixman_implementation_t *imp, pixman_op_t op,
-	       uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_in_ca (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t da = ALPHA_8 (dest[i]);
-        UN8x4_MUL_UN8 (s, a);
-        UN8x4_MUL_UN8 (s, da);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t da = ALPHA_8 (dest[i]);
+	UN8x4_MUL_UN8 (s, a);
+	UN8x4_MUL_UN8 (s, da);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		      uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_multiply (vdest, pix_multiply (vmask, splat_alpha (vsrc)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t d = dest[i];
-        uint32_t sa = ALPHA_8 (src[i]);
-        UN8x4_MUL_UN8 (a, sa);
-        UN8x4_MUL_UN8x4 (d, a);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t d = dest[i];
+	uint32_t sa = ALPHA_8 (src[i]);
+	UN8x4_MUL_UN8 (a, sa);
+	UN8x4_MUL_UN8x4 (d, a);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_out_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_out_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_multiply (pix_multiply (vsrc, vmask), splat_alpha (vdest));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t da = ALPHA_8 (~d);
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_MUL_UN8x4 (s, da);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t da = ALPHA_8 (~d);
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_MUL_UN8x4 (s, da);
+	dest[i] = s;
     }
 }
 
 static void
-vmx_combine_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_multiply (vdest,
-                             negate (pix_multiply (vmask, splat_alpha (vsrc))));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_multiply (
+	    vdest, negate (pix_multiply (vmask, splat_alpha (vsrc))));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t sa = ALPHA_8 (s);
-        UN8x4_MUL_UN8x4 (a, sa);
-        UN8x4_MUL_UN8x4 (d, ~a);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t sa = ALPHA_8 (s);
+	UN8x4_MUL_UN8x4 (a, sa);
+	UN8x4_MUL_UN8x4 (d, ~a);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_atop_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_atop_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
-                            vdest,
-                            negate (pix_multiply (vmask,
-                                                splat_alpha (vmask))));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_add_mul (pix_multiply (vsrc, vmask), splat_alpha (vdest),
+			     vdest,
+			     negate (pix_multiply (vmask,
+						   splat_alpha (vmask))));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t sa = ALPHA_8 (s);
-        uint32_t da = ALPHA_8 (d);
-
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_MUL_UN8 (a, sa);
-        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t sa = ALPHA_8 (s);
+	uint32_t da = ALPHA_8 (d);
+	
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_MUL_UN8 (a, sa);
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_atop_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_add_mul (vdest,
-                            pix_multiply (vmask, splat_alpha (vsrc)),
-                            pix_multiply (vsrc, vmask),
-                            negate (splat_alpha (vdest)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_add_mul (vdest,
+			     pix_multiply (vmask, splat_alpha (vsrc)),
+			     pix_multiply (vsrc, vmask),
+			     negate (splat_alpha (vdest)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t sa = ALPHA_8 (s);
-        uint32_t da = ALPHA_8 (d);
-
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_MUL_UN8 (a, sa);
-        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, ~da);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t sa = ALPHA_8 (s);
+	uint32_t da = ALPHA_8 (d);
+	
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_MUL_UN8 (a, sa);
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, ~da);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_xor_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_xor_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_add_mul (vdest,
-                            negate (pix_multiply (vmask, splat_alpha (vsrc))),
-                            pix_multiply (vsrc, vmask),
-                            negate (splat_alpha (vdest)));
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_add_mul (vdest,
+			     negate (pix_multiply (vmask, splat_alpha (vsrc))),
+			     pix_multiply (vsrc, vmask),
+			     negate (splat_alpha (vdest)));
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-        uint32_t sa = ALPHA_8 (s);
-        uint32_t da = ALPHA_8 (d);
-
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_MUL_UN8 (a, sa);
-        UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, ~da);
-        dest[i] = d;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	uint32_t sa = ALPHA_8 (s);
+	uint32_t da = ALPHA_8 (d);
+	
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_MUL_UN8 (a, sa);
+	UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, ~da);
+	dest[i] = d;
     }
 }
 
 static void
-vmx_combine_add_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+vmx_combine_add_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     int i;
-    vector unsigned int  vdest, vsrc, vmask;
+    vector unsigned int vdest, vsrc, vmask;
     vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
-                         dest_mask, mask_mask, src_mask, store_mask;
-
-    COMPUTE_SHIFT_MASKC(dest, src, mask)
-
+	dest_mask, mask_mask, src_mask, store_mask;
+    
+    COMPUTE_SHIFT_MASKC (dest, src, mask);
+    
     /* printf ("%s\n",__PRETTY_FUNCTION__); */
-    for (i = width/4; i > 0; i--) {
-
-        LOAD_VECTORSC(dest, src, mask)
-
-        vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
-
-        STORE_VECTOR(dest)
-
-        src+=4;
-        dest+=4;
-        mask+=4;
+    for (i = width / 4; i > 0; i--)
+    {
+	
+	LOAD_VECTORSC (dest, src, mask);
+	
+	vdest = pix_add (pix_multiply (vsrc, vmask), vdest);
+	
+	STORE_VECTOR (dest);
+	
+	src += 4;
+	dest += 4;
+	mask += 4;
     }
-
-    for (i = width%4; --i >=0;) {
-        uint32_t a = mask[i];
-        uint32_t s = src[i];
-        uint32_t d = dest[i];
-
-        UN8x4_MUL_UN8x4 (s, a);
-        UN8x4_ADD_UN8x4 (s, d);
-        dest[i] = s;
+    
+    for (i = width % 4; --i >= 0;)
+    {
+	uint32_t a = mask[i];
+	uint32_t s = src[i];
+	uint32_t d = dest[i];
+	
+	UN8x4_MUL_UN8x4 (s, a);
+	UN8x4_ADD_UN8x4 (s, d);
+	dest[i] = s;
     }
 }
 
-
 #if 0
 void
-vmx_composite_over_n_8888 (pixman_operator_t	op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int16_t	src_x,
-			    int16_t	src_y,
-			    int16_t	mask_x,
-			    int16_t	mask_y,
-			    int16_t	dest_x,
-			    int16_t	dest_y,
-			    uint16_t	width,
-			    uint16_t	height)
+vmx_composite_over_n_8888 (pixman_operator_t op,
+                           pixman_image_t *  src_image,
+                           pixman_image_t *  mask_image,
+                           pixman_image_t *  dst_image,
+                           int16_t           src_x,
+                           int16_t           src_y,
+                           int16_t           mask_x,
+                           int16_t           mask_y,
+                           int16_t           dest_x,
+                           int16_t           dest_y,
+                           uint16_t          width,
+                           uint16_t          height)
 {
-    uint32_t	src;
-    uint32_t	*dst_line, *dst;
-    int	dst_stride;
-
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    int dst_stride;
+    
     _pixman_image_get_solid (src_image, dst_image, src);
-
+    
     if (src >> 24 == 0)
 	return;
-
+    
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-
+    
     while (height--)
     {
 	dst = dst_line;
@@ -1515,36 +1672,36 @@ vmx_composite_over_n_8888 (pixman_operator_t	op,
 }
 
 void
-vmx_composite_over_n_0565 (pixman_operator_t	op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int16_t	src_x,
-			    int16_t	src_y,
-			    int16_t	mask_x,
-			    int16_t	mask_y,
-			    int16_t	dest_x,
-			    int16_t	dest_y,
-			    uint16_t	width,
-			    uint16_t	height)
+vmx_composite_over_n_0565 (pixman_operator_t op,
+                           pixman_image_t *  src_image,
+                           pixman_image_t *  mask_image,
+                           pixman_image_t *  dst_image,
+                           int16_t           src_x,
+                           int16_t           src_y,
+                           int16_t           mask_x,
+                           int16_t           mask_y,
+                           int16_t           dest_x,
+                           int16_t           dest_y,
+                           uint16_t          width,
+                           uint16_t          height)
 {
-    uint32_t	src;
-    uint16_t	*dst_line, *dst;
-    uint16_t	w;
-    int	dst_stride;
-
+    uint32_t src;
+    uint16_t    *dst_line, *dst;
+    uint16_t w;
+    int dst_stride;
+    
     _pixman_image_get_solid (src_image, dst_image, src);
-
+    
     if (src >> 24 == 0)
 	return;
-
+    
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-
+    
     while (height--)
     {
 	dst = dst_line;
 	dst_line += dst_stride;
-       vmx_combine_over_u565(dst, src, width);
+	vmx_combine_over_u565 (dst, src, width);
     }
 }
 
@@ -1562,7 +1719,7 @@ _pixman_implementation_create_vmx (void)
 {
     pixman_implementation_t *fast = _pixman_implementation_create_fast_path ();
     pixman_implementation_t *imp = _pixman_implementation_create (fast);
-
+    
     /* Set up function pointers */
     
     /* SSE code patch for fbcompose.c */
@@ -1575,9 +1732,9 @@ _pixman_implementation_create_vmx (void)
     imp->combine_32[PIXMAN_OP_ATOP] = vmx_combine_atop_u;
     imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = vmx_combine_atop_reverse_u;
     imp->combine_32[PIXMAN_OP_XOR] = vmx_combine_xor_u;
-
+    
     imp->combine_32[PIXMAN_OP_ADD] = vmx_combine_add_u;
-
+    
     imp->combine_32_ca[PIXMAN_OP_SRC] = vmx_combine_src_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER] = vmx_combine_over_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = vmx_combine_over_reverse_ca;
commit 01b604c606cd0842c8f4eccc41511a472e4450e9
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 21:57:08 2009 -0400

    Reformat and reindent pixman-utils.c

diff --git a/pixman/pixman-region16.c b/pixman/pixman-region16.c
index 760a2b4..46f5e26 100644
--- a/pixman/pixman-region16.c
+++ b/pixman/pixman-region16.c
@@ -44,8 +44,8 @@ typedef struct {
 
 #include "pixman-region.c"
 
-/* This function exists only to make it possible to preserve the X ABI - it should
- * go away at first opportunity.
+/* This function exists only to make it possible to preserve the X ABI -
+ * it should go away at first opportunity.
  *
  * The problem is that the X ABI exports the three structs and has used
  * them through macros. So the X server calls this function with
diff --git a/pixman/pixman-utils.c b/pixman/pixman-utils.c
index e2abcc2..20c1405 100644
--- a/pixman/pixman-utils.c
+++ b/pixman/pixman-utils.c
@@ -33,29 +33,29 @@
 /*
  * Computing composite region
  */
-#define BOUND(v)	(int16_t) ((v) < INT16_MIN ? INT16_MIN : (v) > INT16_MAX ? INT16_MAX : (v))
+#define BOUND(v)        (int16_t) ((v) < INT16_MIN ? INT16_MIN : (v) > INT16_MAX ? INT16_MAX : (v))
 
 static inline pixman_bool_t
-clip_general_image (pixman_region32_t *	region,
-		  pixman_region32_t *	clip,
-		  int		dx,
-		  int		dy)
+clip_general_image (pixman_region32_t * region,
+                    pixman_region32_t * clip,
+                    int                 dx,
+                    int                 dy)
 {
-    if (pixman_region32_n_rects(region) == 1 &&
-	pixman_region32_n_rects(clip) == 1)
+    if (pixman_region32_n_rects (region) == 1 &&
+        pixman_region32_n_rects (clip) == 1)
     {
-	pixman_box32_t *  rbox = pixman_region32_rectangles(region, NULL);
-	pixman_box32_t *  cbox = pixman_region32_rectangles(clip, NULL);
-	int	v;
-	
+	pixman_box32_t *  rbox = pixman_region32_rectangles (region, NULL);
+	pixman_box32_t *  cbox = pixman_region32_rectangles (clip, NULL);
+	int v;
+
 	if (rbox->x1 < (v = cbox->x1 + dx))
-	    rbox->x1 = BOUND(v);
+	    rbox->x1 = BOUND (v);
 	if (rbox->x2 > (v = cbox->x2 + dx))
-	    rbox->x2 = BOUND(v);
+	    rbox->x2 = BOUND (v);
 	if (rbox->y1 < (v = cbox->y1 + dy))
-	    rbox->y1 = BOUND(v);
+	    rbox->y1 = BOUND (v);
 	if (rbox->y2 > (v = cbox->y2 + dy))
-	    rbox->y2 = BOUND(v);
+	    rbox->y2 = BOUND (v);
 	if (rbox->x1 >= rbox->x2 ||
 	    rbox->y1 >= rbox->y2)
 	{
@@ -73,17 +73,16 @@ clip_general_image (pixman_region32_t *	region,
 	if (!pixman_region32_intersect (region, region, clip))
 	    return FALSE;
 	if (dx || dy)
-	    pixman_region32_translate(region, dx, dy);
+	    pixman_region32_translate (region, dx, dy);
     }
-    return pixman_region32_not_empty(region);
+    return pixman_region32_not_empty (region);
 }
 
-
 static inline pixman_bool_t
-clip_source_image (pixman_region32_t *	region,
-		  pixman_image_t *	picture,
-		  int		dx,
-		  int		dy)
+clip_source_image (pixman_region32_t * region,
+                   pixman_image_t *    picture,
+                   int                 dx,
+                   int                 dy)
 {
     /* The workaround lets certain fast paths run even when they
      * would normally be rejected because of out-of-bounds access.
@@ -101,8 +100,8 @@ clip_source_image (pixman_region32_t *	region,
     }
 
     return clip_general_image (region,
-			     &picture->common.clip_region,
-			     dx, dy);
+                               &picture->common.clip_region,
+                               dx, dy);
 }
 
 /*
@@ -110,33 +109,33 @@ clip_source_image (pixman_region32_t *	region,
  * an allocation failure, but rendering ignores those anyways.
  */
 static pixman_bool_t
-pixman_compute_composite_region32 (pixman_region32_t *	region,
-				   pixman_image_t *	src_image,
-				   pixman_image_t *	mask_image,
-				   pixman_image_t *	dst_image,
-				   int16_t		src_x,
-				   int16_t		src_y,
-				   int16_t		mask_x,
-				   int16_t		mask_y,
-				   int16_t		dest_x,
-				   int16_t		dest_y,
-				   uint16_t		width,
-				   uint16_t		height)
+pixman_compute_composite_region32 (pixman_region32_t * region,
+                                   pixman_image_t *    src_image,
+                                   pixman_image_t *    mask_image,
+                                   pixman_image_t *    dst_image,
+                                   int16_t             src_x,
+                                   int16_t             src_y,
+                                   int16_t             mask_x,
+                                   int16_t             mask_y,
+                                   int16_t             dest_x,
+                                   int16_t             dest_y,
+                                   uint16_t            width,
+                                   uint16_t            height)
 {
-    int		v;
-    
+    int v;
+
     region->extents.x1 = dest_x;
     v = dest_x + width;
-    region->extents.x2 = BOUND(v);
+    region->extents.x2 = BOUND (v);
     region->extents.y1 = dest_y;
     v = dest_y + height;
-    region->extents.y2 = BOUND(v);
+    region->extents.y2 = BOUND (v);
 
     region->extents.x1 = MAX (region->extents.x1, 0);
     region->extents.y1 = MAX (region->extents.y1, 0);
-    
+
     /* Some X servers rely on an old bug, where pixman would just believe the
-     * set clip_region and not clip against the destination geometry. So, 
+     * set clip_region and not clip against the destination geometry. So,
      * since only X servers set "source clip", we don't clip against
      * destination geometry when that is set and when the workaround has
      * not been explicitly disabled by
@@ -149,17 +148,17 @@ pixman_compute_composite_region32 (pixman_region32_t *	region,
 	region->extents.x2 = MIN (region->extents.x2, dst_image->bits.width);
 	region->extents.y2 = MIN (region->extents.y2, dst_image->bits.height);
     }
-    
+
     region->data = 0;
-    
+
     /* Check for empty operation */
     if (region->extents.x1 >= region->extents.x2 ||
-	region->extents.y1 >= region->extents.y2)
+        region->extents.y1 >= region->extents.y2)
     {
 	pixman_region32_init (region);
 	return FALSE;
     }
-    
+
     if (dst_image->common.have_clip_region)
     {
 	if (!clip_general_image (region, &dst_image->common.clip_region, 0, 0))
@@ -168,18 +167,18 @@ pixman_compute_composite_region32 (pixman_region32_t *	region,
 	    return FALSE;
 	}
     }
-    
+
     if (dst_image->common.alpha_map && dst_image->common.alpha_map->common.have_clip_region)
     {
 	if (!clip_general_image (region, &dst_image->common.alpha_map->common.clip_region,
-			       -dst_image->common.alpha_origin_x,
-			       -dst_image->common.alpha_origin_y))
+	                         -dst_image->common.alpha_origin_x,
+	                         -dst_image->common.alpha_origin_y))
 	{
 	    pixman_region32_fini (region);
 	    return FALSE;
 	}
     }
-    
+
     /* clip against src */
     if (src_image->common.have_clip_region)
     {
@@ -192,8 +191,8 @@ pixman_compute_composite_region32 (pixman_region32_t *	region,
     if (src_image->common.alpha_map && src_image->common.alpha_map->common.have_clip_region)
     {
 	if (!clip_source_image (region, (pixman_image_t *)src_image->common.alpha_map,
-			       dest_x - (src_x - src_image->common.alpha_origin_x),
-			       dest_y - (src_y - src_image->common.alpha_origin_y)))
+	                        dest_x - (src_x - src_image->common.alpha_origin_x),
+	                        dest_y - (src_y - src_image->common.alpha_origin_y)))
 	{
 	    pixman_region32_fini (region);
 	    return FALSE;
@@ -206,12 +205,12 @@ pixman_compute_composite_region32 (pixman_region32_t *	region,
 	{
 	    pixman_region32_fini (region);
 	    return FALSE;
-	}	
+	}
 	if (mask_image->common.alpha_map && mask_image->common.alpha_map->common.have_clip_region)
 	{
 	    if (!clip_source_image (region, (pixman_image_t *)mask_image->common.alpha_map,
-				   dest_x - (mask_x - mask_image->common.alpha_origin_x),
-				   dest_y - (mask_y - mask_image->common.alpha_origin_y)))
+	                            dest_x - (mask_x - mask_image->common.alpha_origin_x),
+	                            dest_y - (mask_y - mask_image->common.alpha_origin_y)))
 	    {
 		pixman_region32_fini (region);
 		return FALSE;
@@ -223,55 +222,56 @@ pixman_compute_composite_region32 (pixman_region32_t *	region,
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_compute_composite_region (pixman_region16_t *	region,
-				 pixman_image_t *	src_image,
-				 pixman_image_t *	mask_image,
-				 pixman_image_t *	dst_image,
-				 int16_t		src_x,
-				 int16_t		src_y,
-				 int16_t		mask_x,
-				 int16_t		mask_y,
-				 int16_t		dest_x,
-				 int16_t		dest_y,
-				 uint16_t	width,
-				 uint16_t	height)
+pixman_compute_composite_region (pixman_region16_t * region,
+                                 pixman_image_t *    src_image,
+                                 pixman_image_t *    mask_image,
+                                 pixman_image_t *    dst_image,
+                                 int16_t             src_x,
+                                 int16_t             src_y,
+                                 int16_t             mask_x,
+                                 int16_t             mask_y,
+                                 int16_t             dest_x,
+                                 int16_t             dest_y,
+                                 uint16_t            width,
+                                 uint16_t            height)
 {
     pixman_region32_t r32;
     pixman_bool_t retval;
 
     pixman_region32_init (&r32);
-    
-    retval = pixman_compute_composite_region32 (&r32, src_image, mask_image, dst_image,
-						src_x, src_y, mask_x, mask_y, dest_x, dest_y,
-						width, height);
+
+    retval = pixman_compute_composite_region32 (
+	&r32, src_image, mask_image, dst_image,
+	src_x, src_y, mask_x, mask_y, dest_x, dest_y,
+	width, height);
 
     if (retval)
     {
 	if (!pixman_region16_copy_from_region32 (region, &r32))
 	    retval = FALSE;
     }
-    
+
     pixman_region32_fini (&r32);
     return retval;
 }
 
 pixman_bool_t
 pixman_multiply_overflows_int (unsigned int a,
-		               unsigned int b)
+                               unsigned int b)
 {
     return a >= INT32_MAX / b;
 }
 
 pixman_bool_t
 pixman_addition_overflows_int (unsigned int a,
-		               unsigned int b)
+                               unsigned int b)
 {
     return a > INT32_MAX - b;
 }
 
 void *
-pixman_malloc_ab(unsigned int a,
-		 unsigned int b)
+pixman_malloc_ab (unsigned int a,
+                  unsigned int b)
 {
     if (a >= INT32_MAX / b)
 	return NULL;
@@ -281,8 +281,8 @@ pixman_malloc_ab(unsigned int a,
 
 void *
 pixman_malloc_abc (unsigned int a,
-		   unsigned int b,
-		   unsigned int c)
+                   unsigned int b,
+                   unsigned int c)
 {
     if (a >= INT32_MAX / b)
 	return NULL;
@@ -293,23 +293,25 @@ pixman_malloc_abc (unsigned int a,
 }
 
 /*
- * Helper routine to expand a color component from 0 < n <= 8 bits to 16 bits by
- * replication.
+ * Helper routine to expand a color component from 0 < n <= 8 bits to 16
+ * bits by replication.
  */
 static inline uint64_t
-expand16(const uint8_t val, int nbits)
+expand16 (const uint8_t val, int nbits)
 {
-    // Start out with the high bit of val in the high bit of result.
+    /* Start out with the high bit of val in the high bit of result. */
     uint16_t result = (uint16_t)val << (16 - nbits);
 
     if (nbits == 0)
-        return 0;
+	return 0;
 
-    // Copy the bits in result, doubling the number of bits each time, until we
-    // fill all 16 bits.
-    while (nbits < 16) {
-        result |= result >> nbits;
-        nbits *= 2;
+    /* Copy the bits in result, doubling the number of bits each time, until
+     * we fill all 16 bits.
+     */
+    while (nbits < 16)
+    {
+	result |= result >> nbits;
+	nbits *= 2;
     }
 
     return result;
@@ -323,17 +325,19 @@ expand16(const uint8_t val, int nbits)
  * should be 1234512345123451 and not 1234512312345123.
  */
 void
-pixman_expand(uint64_t *dst, const uint32_t *src,
-	      pixman_format_code_t format, int width)
+pixman_expand (uint64_t *           dst,
+               const uint32_t *     src,
+               pixman_format_code_t format,
+               int                  width)
 {
     /*
-     * Determine the sizes of each component and the masks and shifts required
-     * to extract them from the source pixel.
+     * Determine the sizes of each component and the masks and shifts
+     * required to extract them from the source pixel.
      */
-    const int a_size = PIXMAN_FORMAT_A(format),
-              r_size = PIXMAN_FORMAT_R(format),
-              g_size = PIXMAN_FORMAT_G(format),
-              b_size = PIXMAN_FORMAT_B(format);
+    const int a_size = PIXMAN_FORMAT_A (format),
+              r_size = PIXMAN_FORMAT_R (format),
+              g_size = PIXMAN_FORMAT_G (format),
+              b_size = PIXMAN_FORMAT_B (format);
     const int a_shift = 32 - a_size,
               r_shift = 24 - r_size,
               g_shift = 16 - g_size,
@@ -344,21 +348,22 @@ pixman_expand(uint64_t *dst, const uint32_t *src,
                   b_mask = ~(~0 << b_size);
     int i;
 
-    /* Start at the end so that we can do the expansion in place when src == dst */
+    /* Start at the end so that we can do the expansion in place
+     * when src == dst
+     */
     for (i = width - 1; i >= 0; i--)
     {
-        const uint32_t pixel = src[i];
-        // Extract the components.
-        const uint8_t a = (pixel >> a_shift) & a_mask,
-                      r = (pixel >> r_shift) & r_mask,
-                      g = (pixel >> g_shift) & g_mask,
-                      b = (pixel >> b_shift) & b_mask;
-        const uint64_t a16 = a_size ? expand16(a, a_size) : 0xffff,
-                       r16 = expand16(r, r_size),
-                       g16 = expand16(g, g_size),
-                       b16 = expand16(b, b_size);
-
-        dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16;
+	const uint32_t pixel = src[i];
+	const uint8_t a = (pixel >> a_shift) & a_mask,
+	              r = (pixel >> r_shift) & r_mask,
+	              g = (pixel >> g_shift) & g_mask,
+	              b = (pixel >> b_shift) & b_mask;
+	const uint64_t a16 = a_size ? expand16 (a, a_size) : 0xffff,
+	               r16 = expand16 (r, r_size),
+	               g16 = expand16 (g, g_size),
+	               b16 = expand16 (b, b_size);
+
+	dst[i] = a16 << 48 | r16 << 32 | g16 << 16 | b16;
     }
 }
 
@@ -367,40 +372,44 @@ pixman_expand(uint64_t *dst, const uint32_t *src,
  * components.
  */
 void
-pixman_contract(uint32_t *dst, const uint64_t *src, int width)
+pixman_contract (uint32_t *      dst,
+                 const uint64_t *src,
+                 int             width)
 {
     int i;
 
-    /* Start at the beginning so that we can do the contraction in place when
-     * src == dst */
+    /* Start at the beginning so that we can do the contraction in
+     * place when src == dst
+     */
     for (i = 0; i < width; i++)
     {
-        const uint8_t a = src[i] >> 56,
-                      r = src[i] >> 40,
-                      g = src[i] >> 24,
-                      b = src[i] >> 8;
-        dst[i] = a << 24 | r << 16 | g << 8 | b;
+	const uint8_t a = src[i] >> 56,
+	              r = src[i] >> 40,
+	              g = src[i] >> 24,
+	              b = src[i] >> 8;
+
+	dst[i] = a << 24 | r << 16 | g << 8 | b;
     }
 }
 
 static void
 walk_region_internal (pixman_implementation_t *imp,
-		      pixman_op_t op,
-		      pixman_image_t * src_image,
-		      pixman_image_t * mask_image,
-		      pixman_image_t * dst_image,
-		      int16_t src_x,
-		      int16_t src_y,
-		      int16_t mask_x,
-		      int16_t mask_y,
-		      int16_t dest_x,
-		      int16_t dest_y,
-		      uint16_t width,
-		      uint16_t height,
-		      pixman_bool_t src_repeat,
-		      pixman_bool_t mask_repeat,
-		      pixman_region32_t *region,
-		      pixman_composite_func_t composite_rect)
+                      pixman_op_t              op,
+                      pixman_image_t *         src_image,
+                      pixman_image_t *         mask_image,
+                      pixman_image_t *         dst_image,
+                      int16_t                  src_x,
+                      int16_t                  src_y,
+                      int16_t                  mask_x,
+                      int16_t                  mask_y,
+                      int16_t                  dest_x,
+                      int16_t                  dest_y,
+                      uint16_t                 width,
+                      uint16_t                 height,
+                      pixman_bool_t            src_repeat,
+                      pixman_bool_t            mask_repeat,
+                      pixman_region32_t *      region,
+                      pixman_composite_func_t  composite_rect)
 {
     int n;
     const pixman_box32_t *pbox;
@@ -414,6 +423,7 @@ walk_region_internal (pixman_implementation_t *imp,
 	y_src = pbox->y1 - dest_y + src_y;
 	y_msk = pbox->y1 - dest_y + mask_y;
 	y_dst = pbox->y1;
+
 	while (h)
 	{
 	    h_this = h;
@@ -421,89 +431,96 @@ walk_region_internal (pixman_implementation_t *imp,
 	    x_src = pbox->x1 - dest_x + src_x;
 	    x_msk = pbox->x1 - dest_x + mask_x;
 	    x_dst = pbox->x1;
-	    
+
 	    if (mask_repeat)
 	    {
 		y_msk = MOD (y_msk, mask_image->bits.height);
 		if (h_this > mask_image->bits.height - y_msk)
 		    h_this = mask_image->bits.height - y_msk;
 	    }
+
 	    if (src_repeat)
 	    {
 		y_src = MOD (y_src, src_image->bits.height);
 		if (h_this > src_image->bits.height - y_src)
 		    h_this = src_image->bits.height - y_src;
 	    }
+
 	    while (w)
 	    {
 		w_this = w;
+
 		if (mask_repeat)
 		{
 		    x_msk = MOD (x_msk, mask_image->bits.width);
 		    if (w_this > mask_image->bits.width - x_msk)
 			w_this = mask_image->bits.width - x_msk;
 		}
+
 		if (src_repeat)
 		{
 		    x_src = MOD (x_src, src_image->bits.width);
 		    if (w_this > src_image->bits.width - x_src)
 			w_this = src_image->bits.width - x_src;
 		}
-		(*composite_rect) (imp,
-				  op, src_image, mask_image, dst_image,
-				  x_src, y_src, x_msk, y_msk, x_dst, y_dst,
-				  w_this, h_this);
+
+		(*composite_rect) (imp, op,
+				   src_image, mask_image, dst_image,
+				   x_src, y_src, x_msk, y_msk, x_dst, y_dst,
+				   w_this, h_this);
 		w -= w_this;
+
 		x_src += w_this;
 		x_msk += w_this;
 		x_dst += w_this;
 	    }
+
 	    h -= h_this;
 	    y_src += h_this;
 	    y_msk += h_this;
 	    y_dst += h_this;
 	}
+
 	pbox++;
     }
 }
 
 void
 _pixman_walk_composite_region (pixman_implementation_t *imp,
-			       pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int16_t src_x,
-			       int16_t src_y,
-			       int16_t mask_x,
-			       int16_t mask_y,
-			       int16_t dest_x,
-			       int16_t dest_y,
-			       uint16_t width,
-			       uint16_t height,
-			       pixman_composite_func_t composite_rect)
+                               pixman_op_t              op,
+                               pixman_image_t *         src_image,
+                               pixman_image_t *         mask_image,
+                               pixman_image_t *         dst_image,
+                               int16_t                  src_x,
+                               int16_t                  src_y,
+                               int16_t                  mask_x,
+                               int16_t                  mask_y,
+                               int16_t                  dest_x,
+                               int16_t                  dest_y,
+                               uint16_t                 width,
+                               uint16_t                 height,
+                               pixman_composite_func_t  composite_rect)
 {
     pixman_region32_t region;
-    
+
     pixman_region32_init (&region);
 
     if (pixman_compute_composite_region32 (
-	    &region, src_image, mask_image, dst_image,
-	    src_x, src_y, mask_x, mask_y, dest_x, dest_y,
-	    width, height))
+            &region, src_image, mask_image, dst_image,
+            src_x, src_y, mask_x, mask_y, dest_x, dest_y,
+            width, height))
     {
 	walk_region_internal (imp, op,
-			      src_image, mask_image, dst_image,
-			      src_x, src_y, mask_x, mask_y, dest_x, dest_y,
-			      width, height, FALSE, FALSE,
-			      &region,
-			      composite_rect);
+	                      src_image, mask_image, dst_image,
+	                      src_x, src_y, mask_x, mask_y, dest_x, dest_y,
+	                      width, height, FALSE, FALSE,
+	                      &region,
+	                      composite_rect);
 
 	pixman_region32_fini (&region);
     }
 }
 
-    
 static pixman_bool_t
 mask_is_solid (pixman_image_t *mask)
 {
@@ -511,9 +528,9 @@ mask_is_solid (pixman_image_t *mask)
 	return TRUE;
 
     if (mask->type == BITS &&
-	mask->common.repeat == PIXMAN_REPEAT_NORMAL &&
-	mask->bits.width == 1 &&
-	mask->bits.height == 1)
+        mask->common.repeat == PIXMAN_REPEAT_NORMAL &&
+        mask->bits.width == 1 &&
+        mask->bits.height == 1)
     {
 	return TRUE;
     }
@@ -523,11 +540,11 @@ mask_is_solid (pixman_image_t *mask)
 
 static const pixman_fast_path_t *
 get_fast_path (const pixman_fast_path_t *fast_paths,
-	       pixman_op_t         op,
-	       pixman_image_t     *src_image,
-	       pixman_image_t     *mask_image,
-	       pixman_image_t     *dst_image,
-	       pixman_bool_t       is_pixbuf)
+               pixman_op_t               op,
+               pixman_image_t *          src_image,
+               pixman_image_t *          mask_image,
+               pixman_image_t *          dst_image,
+               pixman_bool_t             is_pixbuf)
 {
     const pixman_fast_path_t *info;
 
@@ -539,8 +556,10 @@ get_fast_path (const pixman_fast_path_t *fast_paths,
 	if (info->op != op)
 	    continue;
 
-	if ((info->src_format == PIXMAN_solid && _pixman_image_is_solid (src_image)) ||
-	    (src_image->type == BITS && info->src_format == src_image->bits.format))
+	if ((info->src_format == PIXMAN_solid &&
+	     _pixman_image_is_solid (src_image)) ||
+	    (src_image->type == BITS &&
+	     info->src_format == src_image->bits.format))
 	{
 	    valid_src = TRUE;
 	}
@@ -549,7 +568,8 @@ get_fast_path (const pixman_fast_path_t *fast_paths,
 	    continue;
 
 	if ((info->mask_format == PIXMAN_null && !mask_image) ||
-	    (mask_image && mask_image->type == BITS && info->mask_format == mask_image->bits.format))
+	    (mask_image && mask_image->type == BITS &&
+	     info->mask_format == mask_image->bits.format))
 	{
 	    valid_mask = TRUE;
 
@@ -568,7 +588,7 @@ get_fast_path (const pixman_fast_path_t *fast_paths,
 
 	if (!valid_mask)
 	    continue;
-	
+
 	if (info->dest_format != dst_image->bits.format)
 	    continue;
 
@@ -581,10 +601,14 @@ get_fast_path (const pixman_fast_path_t *fast_paths,
     return NULL;
 }
 
-static inline pixman_bool_t
-image_covers (pixman_image_t *image, pixman_box32_t *extents, int x, int y)
+static force_inline pixman_bool_t
+image_covers (pixman_image_t *image,
+              pixman_box32_t *extents,
+              int             x,
+              int             y)
 {
-    if (image->common.type == BITS && image->common.repeat == PIXMAN_REPEAT_NONE)
+    if (image->common.type == BITS &&
+	image->common.repeat == PIXMAN_REPEAT_NONE)
     {
 	if (x > extents->x1 || y > extents->y1 ||
 	    x + image->bits.width < extents->x2 ||
@@ -597,73 +621,102 @@ image_covers (pixman_image_t *image, pixman_box32_t *extents, int x, int y)
     return TRUE;
 }
 
+static force_inline pixman_bool_t
+sources_cover (pixman_image_t *src,
+	       pixman_image_t *mask,
+	       pixman_box32_t *extents,
+	       int             src_x,
+	       int             src_y,
+	       int             mask_x,
+	       int             mask_y,
+	       int             dest_x,
+	       int             dest_y)
+{
+    if (!image_covers (src, extents, dest_x - src_x, dest_y - src_y))
+	return FALSE;
+
+    if (!mask)
+	return TRUE;
+    
+    if (!image_covers (mask, extents, dest_x - mask_x, dest_y - mask_y))
+	return FALSE;
+
+    return TRUE;
+}
+
 pixman_bool_t
 _pixman_run_fast_path (const pixman_fast_path_t *paths,
-		       pixman_implementation_t *imp,
-		       pixman_op_t op,
-		       pixman_image_t *src,
-		       pixman_image_t *mask,
-		       pixman_image_t *dest,
-		       int32_t src_x,
-		       int32_t src_y,
-		       int32_t mask_x,
-		       int32_t mask_y,
-		       int32_t dest_x,
-		       int32_t dest_y,
-		       int32_t width,
-		       int32_t height)
+                       pixman_implementation_t * imp,
+                       pixman_op_t               op,
+                       pixman_image_t *          src,
+                       pixman_image_t *          mask,
+                       pixman_image_t *          dest,
+                       int32_t                   src_x,
+                       int32_t                   src_y,
+                       int32_t                   mask_x,
+                       int32_t                   mask_y,
+                       int32_t                   dest_x,
+                       int32_t                   dest_y,
+                       int32_t                   width,
+                       int32_t                   height)
 {
     pixman_composite_func_t func = NULL;
-    pixman_bool_t src_repeat = src->common.repeat == PIXMAN_REPEAT_NORMAL;
-    pixman_bool_t mask_repeat = mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL;
+    pixman_bool_t src_repeat =
+	src->common.repeat == PIXMAN_REPEAT_NORMAL;
+    pixman_bool_t mask_repeat =
+	mask && mask->common.repeat == PIXMAN_REPEAT_NORMAL;
     pixman_bool_t result;
 
     if ((src->type == BITS || _pixman_image_is_solid (src)) &&
-	(!mask || mask->type == BITS)
-	&& !src->common.transform && !(mask && mask->common.transform)
-	&& !(mask && mask->common.alpha_map) && !src->common.alpha_map && !dest->common.alpha_map
-	&& (src->common.filter != PIXMAN_FILTER_CONVOLUTION)
-	&& (src->common.repeat != PIXMAN_REPEAT_PAD)
-	&& (src->common.repeat != PIXMAN_REPEAT_REFLECT)
-	&& (!mask || (mask->common.filter != PIXMAN_FILTER_CONVOLUTION &&
-		      mask->common.repeat != PIXMAN_REPEAT_PAD &&
-		      mask->common.repeat != PIXMAN_REPEAT_REFLECT))
-	&& !src->common.read_func && !src->common.write_func
-	&& !(mask && mask->common.read_func)
-	&& !(mask && mask->common.write_func)
-	&& !dest->common.read_func
-	&& !dest->common.write_func)
+        (!mask || mask->type == BITS)
+        && !src->common.transform && !(mask && mask->common.transform)
+	&& !src->common.alpha_map && !dest->common.alpha_map
+        && !(mask && mask->common.alpha_map)
+        && (src->common.filter != PIXMAN_FILTER_CONVOLUTION)
+        && (src->common.repeat != PIXMAN_REPEAT_PAD)
+        && (src->common.repeat != PIXMAN_REPEAT_REFLECT)
+        && (!mask || (mask->common.filter != PIXMAN_FILTER_CONVOLUTION &&
+                      mask->common.repeat != PIXMAN_REPEAT_PAD &&
+                      mask->common.repeat != PIXMAN_REPEAT_REFLECT))
+        && !src->common.read_func && !src->common.write_func
+        && !(mask && mask->common.read_func)
+        && !(mask && mask->common.write_func)
+        && !dest->common.read_func
+        && !dest->common.write_func)
     {
-	const pixman_fast_path_t *info;	
+	const pixman_fast_path_t *info;
 	pixman_bool_t pixbuf;
 
 	pixbuf =
-	    src && src->type == BITS		&&
-	    mask && mask->type == BITS		&&
-	    src->bits.bits == mask->bits.bits	&&
-	    src_x == mask_x			&&
-	    src_y == mask_y			&&
-	    !mask->common.component_alpha	&&
+	    src && src->type == BITS            &&
+	    mask && mask->type == BITS          &&
+	    src->bits.bits == mask->bits.bits   &&
+	    src_x == mask_x                     &&
+	    src_y == mask_y                     &&
+	    !mask->common.component_alpha       &&
 	    !mask_repeat;
-	
+
 	info = get_fast_path (paths, op, src, mask, dest, pixbuf);
-	
+
 	if (info)
 	{
 	    func = info->func;
-	    
+
 	    if (info->src_format == PIXMAN_solid)
 		src_repeat = FALSE;
-	    
-	    if (info->mask_format == PIXMAN_solid || info->flags & NEED_SOLID_MASK)
+
+	    if (info->mask_format == PIXMAN_solid ||
+		info->flags & NEED_SOLID_MASK)
+	    {
 		mask_repeat = FALSE;
-	    
-	    if ((src_repeat			&&
-		 src->bits.width == 1		&&
-		 src->bits.height == 1)	||
-		(mask_repeat			&&
-		 mask->bits.width == 1		&&
-		 mask->bits.height == 1))
+	    }
+
+	    if ((src_repeat                     &&
+	         src->bits.width == 1           &&
+	         src->bits.height == 1) ||
+	        (mask_repeat                    &&
+	         mask->bits.width == 1          &&
+	         mask->bits.height == 1))
 	    {
 		/* If src or mask are repeating 1x1 images and src_repeat or
 		 * mask_repeat are still TRUE, it means the fast path we
@@ -677,40 +730,40 @@ _pixman_run_fast_path (const pixman_fast_path_t *paths,
 	    }
 	}
     }
-    
+
     result = FALSE;
-    
+
     if (func)
     {
 	pixman_region32_t region;
 	pixman_region32_init (&region);
 
 	if (pixman_compute_composite_region32 (
-		&region, src, mask, dest,
-		src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
+	        &region, src, mask, dest,
+	        src_x, src_y, mask_x, mask_y, dest_x, dest_y, width, height))
 	{
 	    pixman_box32_t *extents = pixman_region32_extents (&region);
 
-	    if ((image_covers (src, extents, dest_x - src_x, dest_y - src_y)   &&
-		 (!mask || image_covers (mask, extents, dest_x - mask_x, dest_y - mask_y))) ||
-		src->common.need_workaround)
+	    if (sources_cover (
+		    src, mask, extents,
+		    src_x, src_y, mask_x, mask_y, dest_x, dest_y))
 	    {
 		walk_region_internal (imp, op,
-				      src, mask, dest,
-				      src_x, src_y, mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height,
-				      src_repeat, mask_repeat,
-				      &region,
-				      func);
-	    
+		                      src, mask, dest,
+		                      src_x, src_y, mask_x, mask_y,
+		                      dest_x, dest_y,
+		                      width, height,
+		                      src_repeat, mask_repeat,
+		                      &region,
+		                      func);
+
 		result = TRUE;
 	    }
-	    
+
 	    pixman_region32_fini (&region);
 	}
     }
-    
+
     return result;
 }
 
@@ -718,20 +771,20 @@ _pixman_run_fast_path (const pixman_fast_path_t *paths,
 
 pixman_bool_t
 pixman_region16_copy_from_region32 (pixman_region16_t *dst,
-				    pixman_region32_t *src)
+                                    pixman_region32_t *src)
 {
     int n_boxes, i;
     pixman_box32_t *boxes32;
     pixman_box16_t *boxes16;
     pixman_bool_t retval;
-    
+
     boxes32 = pixman_region32_rectangles (src, &n_boxes);
 
     boxes16 = pixman_malloc_ab (n_boxes, sizeof (pixman_box16_t));
 
     if (!boxes16)
 	return FALSE;
-    
+
     for (i = 0; i < n_boxes; ++i)
     {
 	boxes16[i].x1 = boxes32[i].x1;
@@ -748,24 +801,24 @@ pixman_region16_copy_from_region32 (pixman_region16_t *dst,
 
 pixman_bool_t
 pixman_region32_copy_from_region16 (pixman_region32_t *dst,
-				    pixman_region16_t *src)
+                                    pixman_region16_t *src)
 {
     int n_boxes, i;
     pixman_box16_t *boxes16;
     pixman_box32_t *boxes32;
     pixman_box32_t tmp_boxes[N_TMP_BOXES];
     pixman_bool_t retval;
-    
+
     boxes16 = pixman_region_rectangles (src, &n_boxes);
 
     if (n_boxes > N_TMP_BOXES)
 	boxes32 = pixman_malloc_ab (n_boxes, sizeof (pixman_box32_t));
     else
 	boxes32 = tmp_boxes;
-    
+
     if (!boxes32)
 	return FALSE;
-    
+
     for (i = 0; i < n_boxes; ++i)
     {
 	boxes32[i].x1 = boxes16[i].x1;
@@ -782,4 +835,3 @@ pixman_region32_copy_from_region16 (pixman_region32_t *dst,
 
     return retval;
 }
-
commit 1d52ecbbe04c810d3f30e7915663b2cd21befcba
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 21:40:41 2009 -0400

    Reformat and reindent pixman-trap.c

diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index f832c8d..5cb83f5 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -35,17 +35,21 @@
 PIXMAN_EXPORT pixman_fixed_t
 pixman_sample_ceil_y (pixman_fixed_t y, int n)
 {
-    pixman_fixed_t   f = pixman_fixed_frac(y);
-    pixman_fixed_t   i = pixman_fixed_floor(y);
+    pixman_fixed_t f = pixman_fixed_frac (y);
+    pixman_fixed_t i = pixman_fixed_floor (y);
 
-    f = ((f + Y_FRAC_FIRST(n)) / STEP_Y_SMALL(n)) * STEP_Y_SMALL(n) + Y_FRAC_FIRST(n);
-    if (f > Y_FRAC_LAST(n))
+    f = ((f + Y_FRAC_FIRST (n)) / STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+	Y_FRAC_FIRST (n);
+    
+    if (f > Y_FRAC_LAST (n))
     {
-	if (pixman_fixed_to_int(i) == 0x7fff)
+	if (pixman_fixed_to_int (i) == 0x7fff)
 	{
 	    f = 0xffff; /* saturate */
-	} else {
-	    f = Y_FRAC_FIRST(n);
+	}
+	else
+	{
+	    f = Y_FRAC_FIRST (n);
 	    i += pixman_fixed_1;
 	}
     }
@@ -57,19 +61,24 @@ pixman_sample_ceil_y (pixman_fixed_t y, int n)
  * grid row
  */
 PIXMAN_EXPORT pixman_fixed_t
-pixman_sample_floor_y (pixman_fixed_t y, int n)
+pixman_sample_floor_y (pixman_fixed_t y,
+                       int            n)
 {
-    pixman_fixed_t   f = pixman_fixed_frac(y);
-    pixman_fixed_t   i = pixman_fixed_floor (y);
+    pixman_fixed_t f = pixman_fixed_frac (y);
+    pixman_fixed_t i = pixman_fixed_floor (y);
+
+    f = DIV (f - Y_FRAC_FIRST (n), STEP_Y_SMALL (n)) * STEP_Y_SMALL (n) +
+	Y_FRAC_FIRST (n);
 
-    f = DIV(f - Y_FRAC_FIRST(n), STEP_Y_SMALL(n)) * STEP_Y_SMALL(n) + Y_FRAC_FIRST(n);
-    if (f < Y_FRAC_FIRST(n))
+    if (f < Y_FRAC_FIRST (n))
     {
-	if (pixman_fixed_to_int(i) == 0x8000)
+	if (pixman_fixed_to_int (i) == 0x8000)
 	{
 	    f = 0; /* saturate */
-	} else {
-	    f = Y_FRAC_LAST(n);
+	}
+	else
+	{
+	    f = Y_FRAC_LAST (n);
 	    i -= pixman_fixed_1;
 	}
     }
@@ -80,9 +89,10 @@ pixman_sample_floor_y (pixman_fixed_t y, int n)
  * Step an edge by any amount (including negative values)
  */
 PIXMAN_EXPORT void
-pixman_edge_step (pixman_edge_t *e, int n)
+pixman_edge_step (pixman_edge_t *e,
+                  int            n)
 {
-    pixman_fixed_48_16_t	ne;
+    pixman_fixed_48_16_t ne;
 
     e->x += n * e->stepx;
 
@@ -113,19 +123,24 @@ pixman_edge_step (pixman_edge_t *e, int n)
  * elements of an edge structure
  */
 static void
-_pixman_edge_multi_init (pixman_edge_t *e, int n, pixman_fixed_t *stepx_p, pixman_fixed_t *dx_p)
+_pixman_edge_multi_init (pixman_edge_t * e,
+                         int             n,
+                         pixman_fixed_t *stepx_p,
+                         pixman_fixed_t *dx_p)
 {
-    pixman_fixed_t	stepx;
-    pixman_fixed_48_16_t	ne;
+    pixman_fixed_t stepx;
+    pixman_fixed_48_16_t ne;
 
     ne = n * (pixman_fixed_48_16_t) e->dx;
     stepx = n * e->stepx;
+
     if (ne > 0)
     {
 	int nx = ne / e->dy;
 	ne -= nx * e->dy;
 	stepx += nx * e->signdx;
     }
+
     *dx_p = ne;
     *stepx_p = stepx;
 }
@@ -135,15 +150,15 @@ _pixman_edge_multi_init (pixman_edge_t *e, int n, pixman_fixed_t *stepx_p, pixma
  * starting y value
  */
 PIXMAN_EXPORT void
-pixman_edge_init (pixman_edge_t	*e,
-		  int		n,
-		  pixman_fixed_t		y_start,
-		  pixman_fixed_t		x_top,
-		  pixman_fixed_t		y_top,
-		  pixman_fixed_t		x_bot,
-		  pixman_fixed_t		y_bot)
+pixman_edge_init (pixman_edge_t *e,
+                  int            n,
+                  pixman_fixed_t y_start,
+                  pixman_fixed_t x_top,
+                  pixman_fixed_t y_top,
+                  pixman_fixed_t x_bot,
+                  pixman_fixed_t y_bot)
 {
-    pixman_fixed_t	dx, dy;
+    pixman_fixed_t dx, dy;
 
     e->x = x_top;
     e->e = 0;
@@ -151,6 +166,7 @@ pixman_edge_init (pixman_edge_t	*e,
     dy = y_bot - y_top;
     e->dy = dy;
     e->dx = 0;
+
     if (dy)
     {
 	if (dx >= 0)
@@ -168,8 +184,11 @@ pixman_edge_init (pixman_edge_t	*e,
 	    e->e = 0;
 	}
 
-	_pixman_edge_multi_init (e, STEP_Y_SMALL(n), &e->stepx_small, &e->dx_small);
-	_pixman_edge_multi_init (e, STEP_Y_BIG(n), &e->stepx_big, &e->dx_big);
+	_pixman_edge_multi_init (e, STEP_Y_SMALL (n),
+				 &e->stepx_small, &e->dx_small);
+
+	_pixman_edge_multi_init (e, STEP_Y_BIG (n),
+				 &e->stepx_big, &e->dx_big);
     }
     pixman_edge_step (e, y_start - y_top);
 }
@@ -179,15 +198,15 @@ pixman_edge_init (pixman_edge_t	*e,
  * and a pixel offset for the line
  */
 PIXMAN_EXPORT void
-pixman_line_fixed_edge_init (pixman_edge_t *e,
-			     int	    n,
-			     pixman_fixed_t	    y,
-			     const pixman_line_fixed_t *line,
-			     int	    x_off,
-			     int	    y_off)
+pixman_line_fixed_edge_init (pixman_edge_t *            e,
+                             int                        n,
+                             pixman_fixed_t             y,
+                             const pixman_line_fixed_t *line,
+                             int                        x_off,
+                             int                        y_off)
 {
-    pixman_fixed_t	x_off_fixed = pixman_int_to_fixed(x_off);
-    pixman_fixed_t	y_off_fixed = pixman_int_to_fixed(y_off);
+    pixman_fixed_t x_off_fixed = pixman_int_to_fixed (x_off);
+    pixman_fixed_t y_off_fixed = pixman_int_to_fixed (y_off);
     const pixman_point_fixed_t *top, *bot;
 
     if (line->p1.y <= line->p2.y)
@@ -200,35 +219,36 @@ pixman_line_fixed_edge_init (pixman_edge_t *e,
 	top = &line->p2;
 	bot = &line->p1;
     }
+    
     pixman_edge_init (e, n, y,
-		    top->x + x_off_fixed,
-		    top->y + y_off_fixed,
-		    bot->x + x_off_fixed,
-		    bot->y + y_off_fixed);
+                      top->x + x_off_fixed,
+                      top->y + y_off_fixed,
+                      bot->x + x_off_fixed,
+                      bot->y + y_off_fixed);
 }
 
 PIXMAN_EXPORT void
-pixman_add_traps (pixman_image_t *	image,
-		  int16_t	x_off,
-		  int16_t	y_off,
-		  int		ntrap,
-		  pixman_trap_t *traps)
+pixman_add_traps (pixman_image_t * image,
+                  int16_t          x_off,
+                  int16_t          y_off,
+                  int              ntrap,
+                  pixman_trap_t *  traps)
 {
-    int		bpp;
-    int		width;
-    int		height;
+    int bpp;
+    int width;
+    int height;
 
-    pixman_fixed_t	x_off_fixed;
-    pixman_fixed_t	y_off_fixed;
-    pixman_edge_t  l, r;
-    pixman_fixed_t	t, b;
+    pixman_fixed_t x_off_fixed;
+    pixman_fixed_t y_off_fixed;
+    pixman_edge_t l, r;
+    pixman_fixed_t t, b;
 
     width = image->bits.width;
     height = image->bits.height;
     bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-    
-    x_off_fixed = pixman_int_to_fixed(x_off);
-    y_off_fixed = pixman_int_to_fixed(y_off);
+
+    x_off_fixed = pixman_int_to_fixed (x_off);
+    y_off_fixed = pixman_int_to_fixed (y_off);
 
     while (ntrap--)
     {
@@ -236,83 +256,80 @@ pixman_add_traps (pixman_image_t *	image,
 	if (t < 0)
 	    t = 0;
 	t = pixman_sample_ceil_y (t, bpp);
-    
+
 	b = traps->bot.y + y_off_fixed;
 	if (pixman_fixed_to_int (b) >= height)
 	    b = pixman_int_to_fixed (height) - 1;
 	b = pixman_sample_floor_y (b, bpp);
-	
+
 	if (b >= t)
 	{
 	    /* initialize edge walkers */
 	    pixman_edge_init (&l, bpp, t,
-			      traps->top.l + x_off_fixed,
-			      traps->top.y + y_off_fixed,
-			      traps->bot.l + x_off_fixed,
-			      traps->bot.y + y_off_fixed);
-	
+	                      traps->top.l + x_off_fixed,
+	                      traps->top.y + y_off_fixed,
+	                      traps->bot.l + x_off_fixed,
+	                      traps->bot.y + y_off_fixed);
+
 	    pixman_edge_init (&r, bpp, t,
-			      traps->top.r + x_off_fixed,
-			      traps->top.y + y_off_fixed,
-			      traps->bot.r + x_off_fixed,
-			      traps->bot.y + y_off_fixed);
-	    
+	                      traps->top.r + x_off_fixed,
+	                      traps->top.y + y_off_fixed,
+	                      traps->bot.r + x_off_fixed,
+	                      traps->bot.y + y_off_fixed);
+
 	    pixman_rasterize_edges (image, &l, &r, t, b);
 	}
+
 	traps++;
     }
 }
 
 static void
 dump_image (pixman_image_t *image,
-	    const char *title)
+            const char *    title)
 {
     int i, j;
-    
+
     if (!image->type == BITS)
-    {
 	printf ("%s is not a regular image\n", title);
-    }
 
     if (!image->bits.format == PIXMAN_a8)
-    {
 	printf ("%s is not an alpha mask\n", title);
-    }
 
     printf ("\n\n\n%s: \n", title);
-    
+
     for (i = 0; i < image->bits.height; ++i)
     {
 	uint8_t *line =
 	    (uint8_t *)&(image->bits.bits[i * image->bits.rowstride]);
-	    
+
 	for (j = 0; j < image->bits.width; ++j)
-	    printf ("%c", line[j]? '#' : ' ');
+	    printf ("%c", line[j] ? '#' : ' ');
 
 	printf ("\n");
     }
 }
 
 PIXMAN_EXPORT void
-pixman_add_trapezoids (pixman_image_t           *image,
-		       int16_t                   x_off,
-		       int                       y_off,
-		       int                       ntraps,
-		       const pixman_trapezoid_t *traps)
+pixman_add_trapezoids (pixman_image_t *          image,
+                       int16_t                   x_off,
+                       int                       y_off,
+                       int                       ntraps,
+                       const pixman_trapezoid_t *traps)
 {
     int i;
 
 #if 0
     dump_image (image, "before");
 #endif
-    
+
     for (i = 0; i < ntraps; ++i)
     {
 	const pixman_trapezoid_t *trap = &(traps[i]);
-	
+
 	if (!pixman_trapezoid_valid (trap))
 	    continue;
-	
+
 	pixman_rasterize_trapezoid (image, trap, x_off, y_off);
     }
 
@@ -322,31 +339,32 @@ pixman_add_trapezoids (pixman_image_t           *image,
 }
 
 PIXMAN_EXPORT void
-pixman_rasterize_trapezoid (pixman_image_t *    image,
-			    const pixman_trapezoid_t *trap,
-			    int			x_off,
-			    int			y_off)
+pixman_rasterize_trapezoid (pixman_image_t *          image,
+                            const pixman_trapezoid_t *trap,
+                            int                       x_off,
+                            int                       y_off)
 {
-    int		bpp;
-    int		width;
-    int		height;
+    int bpp;
+    int width;
+    int height;
 
-    pixman_fixed_t	x_off_fixed;
-    pixman_fixed_t	y_off_fixed;
-    pixman_edge_t	l, r;
-    pixman_fixed_t	t, b;
+    pixman_fixed_t x_off_fixed;
+    pixman_fixed_t y_off_fixed;
+    pixman_edge_t l, r;
+    pixman_fixed_t t, b;
 
     return_if_fail (image->type == BITS);
-    
+
     if (!pixman_trapezoid_valid (trap))
 	return;
 
     width = image->bits.width;
     height = image->bits.height;
     bpp = PIXMAN_FORMAT_BPP (image->bits.format);
-    
-    x_off_fixed = pixman_int_to_fixed(x_off);
-    y_off_fixed = pixman_int_to_fixed(y_off);
+
+    x_off_fixed = pixman_int_to_fixed (x_off);
+    y_off_fixed = pixman_int_to_fixed (y_off);
+
     t = trap->top + y_off_fixed;
     if (t < 0)
 	t = 0;
@@ -356,7 +374,7 @@ pixman_rasterize_trapezoid (pixman_image_t *    image,
     if (pixman_fixed_to_int (b) >= height)
 	b = pixman_int_to_fixed (height) - 1;
     b = pixman_sample_floor_y (b, bpp);
-    
+
     if (b >= t)
     {
 	/* initialize edge walkers */
commit c1178e49417bbea7f91b23f71c9ba957500da0ff
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 21:37:16 2009 -0400

    Reformat pixman-timer.c

diff --git a/pixman/pixman-timer.c b/pixman/pixman-timer.c
index c995bbf..f5ae18e 100644
--- a/pixman/pixman-timer.c
+++ b/pixman/pixman-timer.c
@@ -39,10 +39,10 @@ dump_timers (void)
     for (timer = timers; timer != NULL; timer = timer->next)
     {
 	printf ("%s:   total: %llu     n: %llu      avg: %f\n",
-		timer->name,
-		timer->total,
-		timer->n_times,
-		timer->total / (double)timer->n_times);
+	        timer->name,
+	        timer->total,
+	        timer->n_times,
+	        timer->total / (double)timer->n_times);
     }
 }
 
@@ -51,14 +51,14 @@ pixman_timer_register (pixman_timer_t *timer)
 {
     static int initialized;
 
-    int atexit(void (*function)(void));
+    int atexit (void (*function)(void));
 
     if (!initialized)
     {
 	atexit (dump_timers);
 	initialized = 1;
     }
-    
+
     timer->next = timers;
     timers = timer;
 }
commit 74774bad001504b4b2283689b6b55e21fa943dd8
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 21:36:32 2009 -0400

    Reformat and reindent pixman-sse2.c

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 334990d..cb3daf2 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -23,7 +23,7 @@
  *
  * Author:  Rodrigo Kumpera (kumpera at gmail.com)
  *          André Tupinambá (andrelrt at gmail.com)
- * 
+ *
  * Based on work by Owen Taylor and Søren Sandmann
  */
 #ifdef HAVE_CONFIG_H
@@ -38,7 +38,7 @@
 
 #ifdef USE_SSE2
 
-/* -------------------------------------------------------------------------------------------------
+/* --------------------------------------------------------------------
  * Locals
  */
 
@@ -67,13 +67,13 @@ static __m128i mask_blue;
 static __m128i mask_565_fix_rb;
 static __m128i mask_565_fix_g;
 
-/* -------------------------------------------------------------------------------------------------
+/* ----------------------------------------------------------------------
  * SSE2 Inlines
  */
 static force_inline __m128i
 unpack_32_1x128 (uint32_t data)
 {
-    return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128());
+    return _mm_unpacklo_epi8 (_mm_cvtsi32_si128 (data), _mm_setzero_si128 ());
 }
 
 static force_inline void
@@ -87,7 +87,7 @@ static force_inline __m128i
 unpack_565_to_8888 (__m128i lo)
 {
     __m128i r, g, b, rb, t;
-    
+
     r = _mm_and_si128 (_mm_slli_epi32 (lo, 8), mask_red);
     g = _mm_and_si128 (_mm_slli_epi32 (lo, 5), mask_green);
     b = _mm_and_si128 (_mm_slli_epi32 (lo, 3), mask_blue);
@@ -100,12 +100,16 @@ unpack_565_to_8888 (__m128i lo)
     t  = _mm_and_si128 (g, mask_565_fix_g);
     t  = _mm_srli_epi32 (t, 6);
     g  = _mm_or_si128 (g, t);
-    
+
     return _mm_or_si128 (rb, g);
 }
 
 static force_inline void
-unpack_565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* data2, __m128i* data3)
+unpack_565_128_4x128 (__m128i  data,
+                      __m128i* data0,
+                      __m128i* data1,
+                      __m128i* data2,
+                      __m128i* data3)
 {
     __m128i lo, hi;
 
@@ -122,7 +126,9 @@ unpack_565_128_4x128 (__m128i data, __m128i* data0, __m128i* data1, __m128i* dat
 static force_inline uint16_t
 pack_565_32_16 (uint32_t pixel)
 {
-    return (uint16_t) (((pixel>>8) & 0xf800) | ((pixel>>5) & 0x07e0) | ((pixel>>3) & 0x001f));
+    return (uint16_t) (((pixel >> 8) & 0xf800) |
+		       ((pixel >> 5) & 0x07e0) |
+		       ((pixel >> 3) & 0x001f));
 }
 
 static force_inline __m128i
@@ -137,12 +143,12 @@ pack_565_2x128_128 (__m128i lo, __m128i hi)
     __m128i data;
     __m128i r, g1, g2, b;
 
-    data = pack_2x128_128 ( lo, hi );
+    data = pack_2x128_128 (lo, hi);
 
-    r  = _mm_and_si128 (data , mask_565_r);
-    g1 = _mm_and_si128 (_mm_slli_epi32 (data , 3), mask_565_g1);
-    g2 = _mm_and_si128 (_mm_srli_epi32 (data , 5), mask_565_g2);
-    b  = _mm_and_si128 (_mm_srli_epi32 (data , 3), mask_565_b);
+    r  = _mm_and_si128 (data, mask_565_r);
+    g1 = _mm_and_si128 (_mm_slli_epi32 (data, 3), mask_565_g1);
+    g2 = _mm_and_si128 (_mm_srli_epi32 (data, 5), mask_565_g2);
+    b  = _mm_and_si128 (_mm_srli_epi32 (data, 3), mask_565_b);
 
     return _mm_or_si128 (_mm_or_si128 (_mm_or_si128 (r, g1), g2), b);
 }
@@ -150,64 +156,82 @@ pack_565_2x128_128 (__m128i lo, __m128i hi)
 static force_inline __m128i
 pack_565_4x128_128 (__m128i* xmm0, __m128i* xmm1, __m128i* xmm2, __m128i* xmm3)
 {
-    return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1), pack_565_2x128_128 (*xmm2, *xmm3));
+    return _mm_packus_epi16 (pack_565_2x128_128 (*xmm0, *xmm1),
+			     pack_565_2x128_128 (*xmm2, *xmm3));
 }
 
 static force_inline int
 is_opaque (__m128i x)
 {
     __m128i ffs = _mm_cmpeq_epi8 (x, x);
+
     return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, ffs)) & 0x8888) == 0x8888;
 }
 
 static force_inline int
 is_zero (__m128i x)
 {
-    return _mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) == 0xffff;
+    return _mm_movemask_epi8 (
+	_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) == 0xffff;
 }
 
 static force_inline int
 is_transparent (__m128i x)
 {
-    return (_mm_movemask_epi8 (_mm_cmpeq_epi8 (x, _mm_setzero_si128())) & 0x8888) == 0x8888;
+    return (_mm_movemask_epi8 (
+		_mm_cmpeq_epi8 (x, _mm_setzero_si128 ())) & 0x8888) == 0x8888;
 }
 
 static force_inline __m128i
 expand_pixel_32_1x128 (uint32_t data)
 {
-    return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE(1, 0, 1, 0));
+    return _mm_shuffle_epi32 (unpack_32_1x128 (data), _MM_SHUFFLE (1, 0, 1, 0));
 }
 
 static force_inline __m128i
 expand_alpha_1x128 (__m128i data)
 {
-    return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
+    return _mm_shufflehi_epi16 (_mm_shufflelo_epi16 (data,
+						     _MM_SHUFFLE (3, 3, 3, 3)),
+				_MM_SHUFFLE (3, 3, 3, 3));
 }
 
 static force_inline void
-expand_alpha_2x128 (__m128i data_lo, __m128i data_hi, __m128i* alpha_lo, __m128i* alpha_hi)
+expand_alpha_2x128 (__m128i  data_lo,
+                    __m128i  data_hi,
+                    __m128i* alpha_lo,
+                    __m128i* alpha_hi)
 {
     __m128i lo, hi;
 
-    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE(3, 3, 3, 3));
-    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE(3, 3, 3, 3));
-    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(3, 3, 3, 3));
-    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(3, 3, 3, 3));
+    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 3, 3, 3));
+    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 3, 3, 3));
+
+    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 3, 3, 3));
+    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 3, 3, 3));
 }
 
 static force_inline void
-expand_alpha_rev_2x128 (__m128i data_lo, __m128i data_hi, __m128i* alpha_lo, __m128i* alpha_hi)
+expand_alpha_rev_2x128 (__m128i  data_lo,
+                        __m128i  data_hi,
+                        __m128i* alpha_lo,
+                        __m128i* alpha_hi)
 {
     __m128i lo, hi;
 
-    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE(0, 0, 0, 0));
-    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE(0, 0, 0, 0));
-    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(0, 0, 0, 0));
-    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(0, 0, 0, 0));
+    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (0, 0, 0, 0));
+    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (0, 0, 0, 0));
+    *alpha_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (0, 0, 0, 0));
+    *alpha_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (0, 0, 0, 0));
 }
 
 static force_inline void
-pix_multiply_2x128 (__m128i* data_lo, __m128i* data_hi, __m128i* alpha_lo, __m128i* alpha_hi, __m128i* ret_lo, __m128i* ret_hi)
+pix_multiply_2x128 (__m128i* data_lo,
+                    __m128i* data_hi,
+                    __m128i* alpha_lo,
+                    __m128i* alpha_hi,
+                    __m128i* ret_lo,
+                    __m128i* ret_hi)
 {
     __m128i lo, hi;
 
@@ -220,9 +244,16 @@ pix_multiply_2x128 (__m128i* data_lo, __m128i* data_hi, __m128i* alpha_lo, __m12
 }
 
 static force_inline void
-pix_add_multiply_2x128 (__m128i* src_lo, __m128i* src_hi, __m128i* alpha_dst_lo, __m128i* alpha_dst_hi,
-                      __m128i* dst_lo, __m128i* dst_hi, __m128i* alpha_src_lo, __m128i* alpha_src_hi,
-                      __m128i* ret_lo, __m128i* ret_hi)
+pix_add_multiply_2x128 (__m128i* src_lo,
+                        __m128i* src_hi,
+                        __m128i* alpha_dst_lo,
+                        __m128i* alpha_dst_hi,
+                        __m128i* dst_lo,
+                        __m128i* dst_hi,
+                        __m128i* alpha_src_lo,
+                        __m128i* alpha_src_hi,
+                        __m128i* ret_lo,
+                        __m128i* ret_hi)
 {
     __m128i lo, hi;
     __m128i mul_lo, mul_hi;
@@ -240,25 +271,36 @@ pix_add_multiply_2x128 (__m128i* src_lo, __m128i* src_hi, __m128i* alpha_dst_lo,
 }
 
 static force_inline void
-negate_2x128 (__m128i data_lo, __m128i data_hi, __m128i* neg_lo, __m128i* neg_hi)
+negate_2x128 (__m128i  data_lo,
+              __m128i  data_hi,
+              __m128i* neg_lo,
+              __m128i* neg_hi)
 {
     *neg_lo = _mm_xor_si128 (data_lo, mask_00ff);
     *neg_hi = _mm_xor_si128 (data_hi, mask_00ff);
 }
 
 static force_inline void
-invert_colors_2x128 (__m128i data_lo, __m128i data_hi, __m128i* inv_lo, __m128i* inv_hi)
+invert_colors_2x128 (__m128i  data_lo,
+                     __m128i  data_hi,
+                     __m128i* inv_lo,
+                     __m128i* inv_hi)
 {
     __m128i lo, hi;
 
-    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE(3, 0, 1, 2));
-    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE(3, 0, 1, 2));
-    *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE(3, 0, 1, 2));
-    *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE(3, 0, 1, 2));
+    lo = _mm_shufflelo_epi16 (data_lo, _MM_SHUFFLE (3, 0, 1, 2));
+    hi = _mm_shufflelo_epi16 (data_hi, _MM_SHUFFLE (3, 0, 1, 2));
+    *inv_lo = _mm_shufflehi_epi16 (lo, _MM_SHUFFLE (3, 0, 1, 2));
+    *inv_hi = _mm_shufflehi_epi16 (hi, _MM_SHUFFLE (3, 0, 1, 2));
 }
 
 static force_inline void
-over_2x128 (__m128i* src_lo, __m128i* src_hi, __m128i* alpha_lo, __m128i* alpha_hi, __m128i* dst_lo, __m128i* dst_hi)
+over_2x128 (__m128i* src_lo,
+            __m128i* src_hi,
+            __m128i* alpha_lo,
+            __m128i* alpha_hi,
+            __m128i* dst_lo,
+            __m128i* dst_hi)
 {
     __m128i t1, t2;
 
@@ -271,7 +313,10 @@ over_2x128 (__m128i* src_lo, __m128i* src_hi, __m128i* alpha_lo, __m128i* alpha_
 }
 
 static force_inline void
-over_rev_non_pre_2x128 (__m128i src_lo, __m128i src_hi, __m128i* dst_lo, __m128i* dst_hi)
+over_rev_non_pre_2x128 (__m128i  src_lo,
+                        __m128i  src_hi,
+                        __m128i* dst_lo,
+                        __m128i* dst_hi)
 {
     __m128i lo, hi;
     __m128i alpha_lo, alpha_hi;
@@ -289,13 +334,19 @@ over_rev_non_pre_2x128 (__m128i src_lo, __m128i src_hi, __m128i* dst_lo, __m128i
 }
 
 static force_inline void
-in_over_2x128 (__m128i* src_lo,  __m128i* src_hi,  __m128i*  alpha_lo, __m128i*  alpha_hi,
-              __m128i* mask_lo, __m128i* mask_hi, __m128i* dst_lo,   __m128i* dst_hi)
+in_over_2x128 (__m128i* src_lo,
+               __m128i* src_hi,
+               __m128i* alpha_lo,
+               __m128i* alpha_hi,
+               __m128i* mask_lo,
+               __m128i* mask_hi,
+               __m128i* dst_lo,
+               __m128i* dst_hi)
 {
     __m128i s_lo, s_hi;
     __m128i a_lo, a_hi;
 
-    pix_multiply_2x128 (  src_lo,   src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
+    pix_multiply_2x128 (src_lo,   src_hi, mask_lo, mask_hi, &s_lo, &s_hi);
     pix_multiply_2x128 (alpha_lo, alpha_hi, mask_lo, mask_hi, &a_lo, &a_hi);
 
     over_2x128 (&s_lo, &s_hi, &a_lo, &a_hi, dst_lo, dst_hi);
@@ -327,57 +378,64 @@ load_128_unaligned (const __m128i* src)
     return _mm_loadu_si128 (src);
 }
 
-/* save 4 pixels using Write Combining memory on a 16-byte boundary aligned address */
+/* save 4 pixels using Write Combining memory on a 16-byte
+ * boundary aligned address
+ */
 static force_inline void
-save_128_write_combining (__m128i* dst, __m128i data)
+save_128_write_combining (__m128i* dst,
+                          __m128i  data)
 {
     _mm_stream_si128 (dst, data);
 }
 
 /* save 4 pixels on a 16-byte boundary aligned address */
 static force_inline void
-save_128_aligned (__m128i* dst, __m128i data)
+save_128_aligned (__m128i* dst,
+                  __m128i  data)
 {
     _mm_store_si128 (dst, data);
 }
 
 /* save 4 pixels on a unaligned address */
 static force_inline void
-save_128_unaligned (__m128i* dst, __m128i data)
+save_128_unaligned (__m128i* dst,
+                    __m128i  data)
 {
     _mm_storeu_si128 (dst, data);
 }
 
-/* -------------------------------------------------------------------------------------------------
+/* ------------------------------------------------------------------
  * MMX inlines
  */
 
 static force_inline __m64
 unpack_32_1x64 (uint32_t data)
 {
-    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (data), _mm_setzero_si64());
+    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (data), _mm_setzero_si64 ());
 }
 
 static force_inline __m64
 expand_alpha_1x64 (__m64 data)
 {
-    return _mm_shuffle_pi16 (data, _MM_SHUFFLE(3, 3, 3, 3));
+    return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 3, 3, 3));
 }
 
 static force_inline __m64
 expand_alpha_rev_1x64 (__m64 data)
 {
-    return _mm_shuffle_pi16 (data, _MM_SHUFFLE(0, 0, 0, 0));
+    return _mm_shuffle_pi16 (data, _MM_SHUFFLE (0, 0, 0, 0));
 }
 
 static force_inline __m64
 expand_pixel_8_1x64 (uint8_t data)
 {
-    return _mm_shuffle_pi16 (unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE(0, 0, 0, 0));
+    return _mm_shuffle_pi16 (
+	unpack_32_1x64 ((uint32_t)data), _MM_SHUFFLE (0, 0, 0, 0));
 }
 
 static force_inline __m64
-pix_multiply_1x64 (__m64 data, __m64 alpha)
+pix_multiply_1x64 (__m64 data,
+                   __m64 alpha)
 {
     return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (data, alpha),
                                           mask_x0080),
@@ -385,12 +443,16 @@ pix_multiply_1x64 (__m64 data, __m64 alpha)
 }
 
 static force_inline __m64
-pix_add_multiply_1x64 (__m64* src, __m64* alpha_dst, __m64* dst, __m64* alpha_src)
+pix_add_multiply_1x64 (__m64* src,
+                       __m64* alpha_dst,
+                       __m64* dst,
+                       __m64* alpha_src)
 {
-    return _mm_mulhi_pu16 (_mm_adds_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (*src, *alpha_dst),
-                                                         mask_x0080),
-                                          _mm_mullo_pi16 (*dst, *alpha_src)),
-                           mask_x0101);
+    return _mm_mulhi_pu16 (
+	_mm_adds_pu16 (_mm_adds_pu16 (_mm_mullo_pi16 (*src, *alpha_dst),
+				      mask_x0080),
+		       _mm_mullo_pi16 (*dst, *alpha_src)),
+	mask_x0101);
 }
 
 static force_inline __m64
@@ -402,7 +464,7 @@ negate_1x64 (__m64 data)
 static force_inline __m64
 invert_colors_1x64 (__m64 data)
 {
-    return _mm_shuffle_pi16 (data, _MM_SHUFFLE(3, 0, 1, 2));
+    return _mm_shuffle_pi16 (data, _MM_SHUFFLE (3, 0, 1, 2));
 }
 
 static force_inline __m64
@@ -425,15 +487,15 @@ over_rev_non_pre_1x64 (__m64 src, __m64 dst)
     __m64 alpha = expand_alpha_1x64 (src);
 
     return over_1x64 (pix_multiply_1x64 (invert_colors_1x64 (src),
-                                        _mm_or_si64 (alpha, mask_x_alpha)),
+                                         _mm_or_si64 (alpha, mask_x_alpha)),
                       alpha,
                       dst);
 }
 
 static force_inline uint32_t
-pack_1x64_32( __m64 data )
+pack_1x64_32 (__m64 data)
 {
-    return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64()));
+    return _mm_cvtsi64_si32 (_mm_packs_pu16 (data, _mm_setzero_si64 ()));
 }
 
 /* Expand 16 bits positioned at @pos (0-3) of a mmx register into
@@ -469,25 +531,26 @@ expand565_16_1x64 (uint16_t pixel)
     return _mm_srli_pi16 (p, 8);
 }
 
-/* -------------------------------------------------------------------------------------------------
+/* ----------------------------------------------------------------------------
  * Compose Core transformations
  */
 static force_inline uint32_t
 core_combine_over_u_pixel_sse2 (uint32_t src, uint32_t dst)
 {
-    uint8_t     a;
-    __m64       ms;
+    uint8_t a;
+    __m64 ms;
 
     a = src >> 24;
 
     if (a == 0xff)
     {
-        return src;
+	return src;
     }
     else if (src)
     {
-        ms = unpack_32_1x64 (src);
-        return pack_1x64_32 (over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst)));
+	ms = unpack_32_1x64 (src);
+	return pack_1x64_32 (
+	    over_1x64 (ms, expand_alpha_1x64 (ms), unpack_32_1x64 (dst)));
     }
 
     return dst;
@@ -504,7 +567,7 @@ combine1 (const uint32_t *ps, const uint32_t *pm)
 
 	mm = unpack_32_1x64 (*pm);
 	mm = expand_alpha_1x64 (mm);
-	
+
 	ms = unpack_32_1x64 (s);
 	ms = pix_multiply_1x64 (ms, mm);
 
@@ -520,7 +583,7 @@ combine4 (const __m128i *ps, const __m128i *pm)
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_msk_lo, xmm_msk_hi;
     __m128i s;
-    
+
     if (pm)
     {
 	xmm_msk_lo = load_128_unaligned (pm);
@@ -528,18 +591,20 @@ combine4 (const __m128i *ps, const __m128i *pm)
 	if (is_transparent (xmm_msk_lo))
 	    return _mm_setzero_si128 ();
     }
-    
+
     s = load_128_unaligned (ps);
-	
+
     if (pm)
     {
 	unpack_128_2x128 (s, &xmm_src_lo, &xmm_src_hi);
 	unpack_128_2x128 (xmm_msk_lo, &xmm_msk_lo, &xmm_msk_hi);
-	
+
 	expand_alpha_2x128 (xmm_msk_lo, xmm_msk_hi, &xmm_msk_lo, &xmm_msk_hi);
-	
-	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_msk_lo, &xmm_msk_hi, &xmm_src_lo, &xmm_src_hi);
-	
+
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_msk_lo, &xmm_msk_hi,
+			    &xmm_src_lo, &xmm_src_hi);
+
 	s = pack_2x128_128 (xmm_src_lo, xmm_src_hi);
     }
 
@@ -547,7 +612,10 @@ combine4 (const __m128i *ps, const __m128i *pm)
 }
 
 static force_inline void
-core_combine_over_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_over_u_sse2 (uint32_t*       pd,
+                          const uint32_t* ps,
+                          const uint32_t* pm,
+                          int             w)
 {
     uint32_t s, d;
 
@@ -564,14 +632,14 @@ core_combine_over_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm,
     while (w &&
            ((unsigned long)pd & 15))
     {
-        d = *pd;
-        s = combine1 (ps, pm);
+	d = *pd;
+	s = combine1 (ps, pm);
 
-        *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+	*pd++ = core_combine_over_u_pixel_sse2 (s, d);
 	ps++;
 	if (pm)
 	    pm++;
-        w--;
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -581,55 +649,65 @@ core_combine_over_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        /* I'm loading unaligned because I'm not sure about the address alignment. */
-        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-
-        if (is_opaque (xmm_src_hi))
-        {
-            save_128_aligned ((__m128i*)pd, xmm_src_hi);
-        }
-        else if (!is_zero (xmm_src_hi))
-        {
-            xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-
-            unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-            unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-
-            over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-            /* rebuid the 4 pixel data and save*/
-            save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-        }
-
-        w -= 4;
-        ps += 4;
-        pd += 4;
+	/* I'm loading unaligned because I'm not sure about
+	 * the address alignment.
+	 */
+	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+
+	if (is_opaque (xmm_src_hi))
+	{
+	    save_128_aligned ((__m128i*)pd, xmm_src_hi);
+	}
+	else if (!is_zero (xmm_src_hi))
+	{
+	    xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+
+	    unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+
+	    expand_alpha_2x128 (
+		xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+
+	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			&xmm_alpha_lo, &xmm_alpha_hi,
+			&xmm_dst_lo, &xmm_dst_hi);
+
+	    /* rebuid the 4 pixel data and save*/
+	    save_128_aligned ((__m128i*)pd,
+			      pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	}
+
+	w -= 4;
+	ps += 4;
+	pd += 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        d = *pd;
-        s = combine1 (ps, pm);
+	d = *pd;
+	s = combine1 (ps, pm);
 
-        *pd++ = core_combine_over_u_pixel_sse2 (s, d);
+	*pd++ = core_combine_over_u_pixel_sse2 (s, d);
 	ps++;
 	if (pm)
 	    pm++;
-        w--;
+
+	w--;
     }
 }
 
 static force_inline void
-core_combine_over_reverse_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_over_reverse_u_sse2 (uint32_t*       pd,
+                                  const uint32_t* ps,
+                                  const uint32_t* pm,
+                                  int             w)
 {
     uint32_t s, d;
 
@@ -646,11 +724,11 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32
     while (w &&
            ((unsigned long)pd & 15))
     {
-        d = *pd;
-        s = combine1 (ps, pm);
+	d = *pd;
+	s = combine1 (ps, pm);
 
-        *pd++ = core_combine_over_u_pixel_sse2 (d, s);
-        w--;
+	*pd++ = core_combine_over_u_pixel_sse2 (d, s);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -663,40 +741,47 @@ core_combine_over_reverse_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        /* I'm loading unaligned because I'm not sure about the address alignment. */
-        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	/* I'm loading unaligned because I'm not sure
+	 * about the address alignment.
+	 */
+	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
 
-        over_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_src_lo, &xmm_src_hi);
+	over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+		    &xmm_alpha_lo, &xmm_alpha_hi,
+		    &xmm_src_lo, &xmm_src_hi);
 
-        /* rebuid the 4 pixel data and save*/
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_src_lo, xmm_src_hi));
+	/* rebuid the 4 pixel data and save*/
+	save_128_aligned ((__m128i*)pd,
+			  pack_2x128_128 (xmm_src_lo, xmm_src_hi));
+
+	w -= 4;
+	ps += 4;
+	pd += 4;
 
-        w -= 4;
-        ps += 4;
-        pd += 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        d = *pd;
-        s = combine1 (ps, pm);
+	d = *pd;
+	s = combine1 (ps, pm);
 
-        *pd++ = core_combine_over_u_pixel_sse2 (d, s);
+	*pd++ = core_combine_over_u_pixel_sse2 (d, s);
 	ps++;
-        w--;
+	w--;
 	if (pm)
 	    pm++;
     }
@@ -709,18 +794,23 @@ core_combine_in_u_pixelsse2 (uint32_t src, uint32_t dst)
 
     if (maska == 0)
     {
-        return 0;
+	return 0;
     }
     else if (maska != 0xff)
     {
-        return pack_1x64_32(pix_multiply_1x64 (unpack_32_1x64 (dst), expand_alpha_1x64 (unpack_32_1x64 (src))));
+	return pack_1x64_32 (
+	    pix_multiply_1x64 (unpack_32_1x64 (dst),
+			       expand_alpha_1x64 (unpack_32_1x64 (src))));
     }
 
     return dst;
 }
 
 static force_inline void
-core_combine_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_in_u_sse2 (uint32_t*       pd,
+                        const uint32_t* ps,
+                        const uint32_t* pm,
+                        int             w)
 {
     uint32_t s, d;
 
@@ -734,11 +824,11 @@ core_combine_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, in
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_in_u_pixelsse2 (d, s);
-        w--;
+	*pd++ = core_combine_in_u_pixelsse2 (d, s);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -751,36 +841,39 @@ core_combine_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, in
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-        xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*) pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned ((__m128i*)pd,
+			  pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	w -= 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_in_u_pixelsse2 (d, s);
-        w--;
+	*pd++ = core_combine_in_u_pixelsse2 (d, s);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -788,7 +881,10 @@ core_combine_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, in
 }
 
 static force_inline void
-core_combine_reverse_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w)
+core_combine_reverse_in_u_sse2 (uint32_t*       pd,
+                                const uint32_t* ps,
+                                const uint32_t *pm,
+                                int             w)
 {
     uint32_t s, d;
 
@@ -802,12 +898,12 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_in_u_pixelsse2 (s, d);
+	*pd++ = core_combine_in_u_pixelsse2 (s, d);
 	ps++;
-        w--;
+	w--;
 	if (pm)
 	    pm++;
     }
@@ -819,36 +915,39 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
-        xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_src_lo, &xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_src_lo, &xmm_src_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	w -= 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_in_u_pixelsse2 (s, d);
-        w--;
+	*pd++ = core_combine_in_u_pixelsse2 (s, d);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -856,7 +955,10 @@ core_combine_reverse_in_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t
 }
 
 static force_inline void
-core_combine_reverse_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_reverse_out_u_sse2 (uint32_t*       pd,
+                                 const uint32_t* ps,
+                                 const uint32_t* pm,
+                                 int             w)
 {
     /* call prefetch hint to optimize cache load*/
     cache_prefetch ((__m128i*)ps);
@@ -865,14 +967,18 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_
 
     while (w && ((unsigned long) pd & 15))
     {
-        uint32_t s = combine1 (ps, pm);
-        uint32_t d = *pd;
+	uint32_t s = combine1 (ps, pm);
+	uint32_t d = *pd;
 
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (d), negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (d), negate_1x64 (
+		    expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	
 	if (pm)
 	    pm++;
 	ps++;
-        w--;
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -882,49 +988,59 @@ core_combine_reverse_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_
 
     while (w >= 4)
     {
-        __m128i xmm_src_lo, xmm_src_hi;
-        __m128i xmm_dst_lo, xmm_dst_hi;
+	__m128i xmm_src_lo, xmm_src_hi;
+	__m128i xmm_dst_lo, xmm_dst_hi;
 
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        negate_2x128      (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	negate_2x128       (xmm_src_lo, xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
 
-        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_src_lo, &xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_src_lo, &xmm_src_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
+	ps += 4;
+	pd += 4;
 	if (pm)
 	    pm += 4;
-        w -= 4;
+
+	w -= 4;
     }
 
     while (w)
     {
-        uint32_t s = combine1 (ps, pm);
-        uint32_t d = *pd;
+	uint32_t s = combine1 (ps, pm);
+	uint32_t d = *pd;
 
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (d), negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (d), negate_1x64 (
+		    expand_alpha_1x64 (unpack_32_1x64 (s)))));
 	ps++;
 	if (pm)
 	    pm++;
-        w--;
+	w--;
     }
 }
 
 static force_inline void
-core_combine_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_out_u_sse2 (uint32_t*       pd,
+                         const uint32_t* ps,
+                         const uint32_t* pm,
+                         int             w)
 {
     /* call prefetch hint to optimize cache load*/
     cache_prefetch ((__m128i*)ps);
@@ -933,11 +1049,14 @@ core_combine_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, i
 
     while (w && ((unsigned long) pd & 15))
     {
-        uint32_t s = combine1 (ps, pm);
-        uint32_t d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (s), negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
-        w--;
+	uint32_t s = combine1 (ps, pm);
+	uint32_t d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (s), negate_1x64 (
+		    expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -950,41 +1069,47 @@ core_combine_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, i
 
     while (w >= 4)
     {
-        __m128i xmm_src_lo, xmm_src_hi;
-        __m128i xmm_dst_lo, xmm_dst_hi;
+	__m128i xmm_src_lo, xmm_src_hi;
+	__m128i xmm_dst_lo, xmm_dst_hi;
 
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	xmm_src_hi = combine4 ((__m128i*) ps, (__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        negate_2x128      (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	negate_2x128       (xmm_dst_lo, xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	w -= 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        uint32_t s = combine1 (ps, pm);
-        uint32_t d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (s), negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
-        w--;
+	uint32_t s = combine1 (ps, pm);
+	uint32_t d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (s), negate_1x64 (
+		    expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -992,7 +1117,8 @@ core_combine_out_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, i
 }
 
 static force_inline uint32_t
-core_combine_atop_u_pixel_sse2 (uint32_t src, uint32_t dst)
+core_combine_atop_u_pixel_sse2 (uint32_t src,
+                                uint32_t dst)
 {
     __m64 s = unpack_32_1x64 (src);
     __m64 d = unpack_32_1x64 (dst);
@@ -1004,7 +1130,10 @@ core_combine_atop_u_pixel_sse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-core_combine_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_atop_u_sse2 (uint32_t*       pd,
+                          const uint32_t* ps,
+                          const uint32_t* pm,
+                          int             w)
 {
     uint32_t s, d;
 
@@ -1020,11 +1149,11 @@ core_combine_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm,
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
-        w--;
+	*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -1037,42 +1166,47 @@ core_combine_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
-        negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
+		      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
 
-        pix_add_multiply_2x128 ( &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-                               &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-                               &xmm_dst_lo, &xmm_dst_hi );
+	pix_add_multiply_2x128 (
+	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+	    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	w -= 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_atop_u_pixel_sse2 (s, d);
-        w--;
+	*pd++ = core_combine_atop_u_pixel_sse2 (s, d);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -1080,7 +1214,8 @@ core_combine_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm,
 }
 
 static force_inline uint32_t
-core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, uint32_t dst)
+core_combine_reverse_atop_u_pixel_sse2 (uint32_t src,
+                                        uint32_t dst)
 {
     __m64 s = unpack_32_1x64 (src);
     __m64 d = unpack_32_1x64 (dst);
@@ -1092,7 +1227,10 @@ core_combine_reverse_atop_u_pixel_sse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-core_combine_reverse_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t* pm, int w)
+core_combine_reverse_atop_u_sse2 (uint32_t*       pd,
+                                  const uint32_t* ps,
+                                  const uint32_t* pm,
+                                  int             w)
 {
     uint32_t s, d;
 
@@ -1108,12 +1246,12 @@ core_combine_reverse_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
+	*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
 	ps++;
-        w--;
+	w--;
 	if (pm)
 	    pm++;
     }
@@ -1125,50 +1263,56 @@ core_combine_reverse_atop_u_sse2 (uint32_t* pd, const uint32_t* ps, const uint32
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
-        xmm_dst_hi = load_128_aligned ((__m128i*) pd);
+	xmm_src_hi = combine4 ((__m128i*)ps, (__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*) pd);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
-        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
-        pix_add_multiply_2x128 ( &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-                               &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-                               &xmm_dst_lo, &xmm_dst_hi );
+	pix_add_multiply_2x128 (
+	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+	    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	w -= 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
+	*pd++ = core_combine_reverse_atop_u_pixel_sse2 (s, d);
 	ps++;
-        w--;
+	w--;
 	if (pm)
 	    pm++;
     }
 }
 
 static force_inline uint32_t
-core_combine_xor_u_pixel_sse2 (uint32_t src, uint32_t dst)
+core_combine_xor_u_pixel_sse2 (uint32_t src,
+                               uint32_t dst)
 {
     __m64 s = unpack_32_1x64 (src);
     __m64 d = unpack_32_1x64 (dst);
@@ -1180,14 +1324,17 @@ core_combine_xor_u_pixel_sse2 (uint32_t src, uint32_t dst)
 }
 
 static force_inline void
-core_combine_xor_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mask, int width)
+core_combine_xor_u_sse2 (uint32_t*       dst,
+                         const uint32_t* src,
+                         const uint32_t *mask,
+                         int             width)
 {
     int w = width;
     uint32_t s, d;
     uint32_t* pd = dst;
     const uint32_t* ps = src;
     const uint32_t* pm = mask;
-    
+
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
@@ -1200,11 +1347,11 @@ core_combine_xor_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mas
 
     while (w && ((unsigned long) pd & 15))
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
-        w--;
+	*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -1217,43 +1364,49 @@ core_combine_xor_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mas
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
-        xmm_dst = load_128_aligned ((__m128i*) pd);
+	xmm_src = combine4 ((__m128i*) ps, (__m128i*) pm);
+	xmm_dst = load_128_aligned ((__m128i*) pd);
 
-        unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
-        negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	negate_2x128 (xmm_alpha_src_lo, xmm_alpha_src_hi,
+		      &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
-        pix_add_multiply_2x128 ( &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-                               &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
-                               &xmm_dst_lo, &xmm_dst_hi );
+	pix_add_multiply_2x128 (
+	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+	    &xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+	    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	w -= 4;
 	if (pm)
 	    pm += 4;
     }
 
     while (w)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
 
-        *pd++ = core_combine_xor_u_pixel_sse2 (s, d);
-        w--;
+	*pd++ = core_combine_xor_u_pixel_sse2 (s, d);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -1261,10 +1414,13 @@ core_combine_xor_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t *mas
 }
 
 static force_inline void
-core_combine_add_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t* mask, int width)
+core_combine_add_u_sse2 (uint32_t*       dst,
+                         const uint32_t* src,
+                         const uint32_t* mask,
+                         int             width)
 {
     int w = width;
-    uint32_t s,d;
+    uint32_t s, d;
     uint32_t* pd = dst;
     const uint32_t* ps = src;
     const uint32_t* pm = mask;
@@ -1276,13 +1432,15 @@ core_combine_add_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t* mas
 
     while (w && (unsigned long)pd & 15)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
+
 	ps++;
 	if (pm)
 	    pm++;
-        *pd++ = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
-        w--;
+	*pd++ = _mm_cvtsi64_si32 (
+	    _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1293,36 +1451,40 @@ core_combine_add_u_sse2 (uint32_t* dst, const uint32_t* src, const uint32_t* mas
     while (w >= 4)
     {
 	__m128i s;
-	
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-	s = combine4((__m128i*)ps,(__m128i*)pm);
-	
-        save_128_aligned( (__m128i*)pd,
-                        _mm_adds_epu8( s, load_128_aligned  ((__m128i*)pd)) );
-        pd += 4;
-        ps += 4;
+	s = combine4 ((__m128i*)ps, (__m128i*)pm);
+
+	save_128_aligned (
+	    (__m128i*)pd, _mm_adds_epu8 (s, load_128_aligned  ((__m128i*)pd)));
+
+	pd += 4;
+	ps += 4;
 	if (pm)
 	    pm += 4;
-        w -= 4;
+	w -= 4;
     }
 
     while (w--)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
+	s = combine1 (ps, pm);
+	d = *pd;
+
 	ps++;
-        *pd++ = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
+	*pd++ = _mm_cvtsi64_si32 (
+	    _mm_adds_pu8 (_mm_cvtsi32_si64 (s), _mm_cvtsi32_si64 (d)));
 	if (pm)
 	    pm++;
     }
 }
 
 static force_inline uint32_t
-core_combine_saturate_u_pixel_sse2 (uint32_t src, uint32_t dst)
+core_combine_saturate_u_pixel_sse2 (uint32_t src,
+                                    uint32_t dst)
 {
     __m64 ms = unpack_32_1x64 (src);
     __m64 md = unpack_32_1x64 (dst);
@@ -1331,16 +1493,20 @@ core_combine_saturate_u_pixel_sse2 (uint32_t src, uint32_t dst)
 
     if (sa > da)
     {
-        ms = pix_multiply_1x64 (ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8(da, sa) << 24)));
+	ms = pix_multiply_1x64 (
+	    ms, expand_alpha_1x64 (unpack_32_1x64 (DIV_UN8 (da, sa) << 24)));
     }
 
     return pack_1x64_32 (_mm_adds_pu16 (md, ms));
 }
 
 static force_inline void
-core_combine_saturate_u_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_saturate_u_sse2 (uint32_t *      pd,
+                              const uint32_t *ps,
+                              const uint32_t *pm,
+                              int             w)
 {
-    uint32_t s,d;
+    uint32_t s, d;
 
     uint32_t pack_cmp;
     __m128i xmm_src, xmm_dst;
@@ -1352,10 +1518,11 @@ core_combine_saturate_u_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *
 
     while (w && (unsigned long)pd & 15)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
-        *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
-        w--;
+	s = combine1 (ps, pm);
+	d = *pd;
+
+	*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+	w--;
 	ps++;
 	if (pm)
 	    pm++;
@@ -1368,62 +1535,65 @@ core_combine_saturate_u_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
 	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst = load_128_aligned  ((__m128i*)pd);
-        xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
+	xmm_dst = load_128_aligned  ((__m128i*)pd);
+	xmm_src = combine4 ((__m128i*)ps, (__m128i*)pm);
 
-        pack_cmp = _mm_movemask_epi8 (_mm_cmpgt_epi32 (_mm_srli_epi32 (xmm_src, 24),
-                                                      _mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
+	pack_cmp = _mm_movemask_epi8 (
+	    _mm_cmpgt_epi32 (
+		_mm_srli_epi32 (xmm_src, 24),
+		_mm_srli_epi32 (_mm_xor_si128 (xmm_dst, mask_ff000000), 24)));
 
-        /* if some alpha src is grater than respective ~alpha dst */
-        if (pack_cmp)
-        {
-            s = combine1 (ps++, pm);
-            d = *pd;
-            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+	/* if some alpha src is grater than respective ~alpha dst */
+	if (pack_cmp)
+	{
+	    s = combine1 (ps++, pm);
+	    d = *pd;
+	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
 	    if (pm)
 		pm++;
 
-            s = combine1 (ps++, pm);
-            d = *pd;
-            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+	    s = combine1 (ps++, pm);
+	    d = *pd;
+	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
 	    if (pm)
 		pm++;
 
-            s = combine1 (ps++, pm);
-            d = *pd;
-            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+	    s = combine1 (ps++, pm);
+	    d = *pd;
+	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
 	    if (pm)
 		pm++;
 
-            s = combine1 (ps++, pm);
-            d = *pd;
-            *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+	    s = combine1 (ps++, pm);
+	    d = *pd;
+	    *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
 	    if (pm)
 		pm++;
-        }
-        else
-        {
-            save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
+	}
+	else
+	{
+	    save_128_aligned ((__m128i*)pd, _mm_adds_epu8 (xmm_dst, xmm_src));
 
-            pd += 4;
-            ps += 4;
+	    pd += 4;
+	    ps += 4;
 	    if (pm)
 		pm += 4;
-        }
+	}
 
-        w -= 4;
+	w -= 4;
     }
 
     while (w--)
     {
-        s = combine1 (ps, pm);
-        d = *pd;
-        *pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
+	s = combine1 (ps, pm);
+	d = *pd;
+
+	*pd++ = core_combine_saturate_u_pixel_sse2 (s, d);
 	ps++;
 	if (pm)
 	    pm++;
@@ -1431,7 +1601,10 @@ core_combine_saturate_u_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *
 }
 
 static force_inline void
-core_combine_src_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w)
+core_combine_src_ca_sse2 (uint32_t*       pd,
+                          const uint32_t* ps,
+                          const uint32_t *pm,
+                          int             w)
 {
     uint32_t s, m;
 
@@ -1446,10 +1619,11 @@ core_combine_src_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm,
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1459,38 +1633,44 @@ core_combine_src_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)));
+	w--;
     }
 }
 
 static force_inline uint32_t
-core_combine_over_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
+core_combine_over_ca_pixel_sse2 (uint32_t src,
+                                 uint32_t mask,
+                                 uint32_t dst)
 {
     __m64 s = unpack_32_1x64 (src);
     __m64 expAlpha = expand_alpha_1x64 (s);
@@ -1501,7 +1681,10 @@ core_combine_over_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
 }
 
 static force_inline void
-core_combine_over_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w)
+core_combine_over_ca_sse2 (uint32_t*       pd,
+                           const uint32_t* ps,
+                           const uint32_t *pm,
+                           int             w)
 {
     uint32_t s, m, d;
 
@@ -1517,12 +1700,12 @@ core_combine_over_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm,
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1532,52 +1715,65 @@ core_combine_over_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
 
-        in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+	in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+		       &xmm_alpha_lo, &xmm_alpha_hi,
+		       &xmm_mask_lo, &xmm_mask_hi,
+		       &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_over_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 }
 
 static force_inline uint32_t
-core_combine_over_reverse_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
+core_combine_over_reverse_ca_pixel_sse2 (uint32_t src,
+                                         uint32_t mask,
+                                         uint32_t dst)
 {
     __m64 d = unpack_32_1x64 (dst);
 
-	return pack_1x64_32(over_1x64 (d, expand_alpha_1x64 (d), pix_multiply_1x64 (unpack_32_1x64 (src), unpack_32_1x64 (mask))));
+    return pack_1x64_32 (
+	over_1x64 (d, expand_alpha_1x64 (d),
+		   pix_multiply_1x64 (unpack_32_1x64 (src),
+				      unpack_32_1x64 (mask))));
 }
 
 static force_inline void
-core_combine_over_reverse_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint32_t *pm, int w)
+core_combine_over_reverse_ca_sse2 (uint32_t*       pd,
+                                   const uint32_t* ps,
+                                   const uint32_t *pm,
+                                   int             w)
 {
     uint32_t s, m, d;
 
@@ -1593,12 +1789,12 @@ core_combine_over_reverse_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint3
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1608,45 +1804,54 @@ core_combine_over_reverse_ca_sse2 (uint32_t* pd, const uint32_t* ps, const uint3
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_mask_lo, &xmm_mask_hi);
 
-        over_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_mask_lo, &xmm_mask_hi);
+	over_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+		    &xmm_alpha_lo, &xmm_alpha_hi,
+		    &xmm_mask_lo, &xmm_mask_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_over_reverse_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 }
 
 static force_inline void
-core_combine_in_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_in_ca_sse2 (uint32_t *      pd,
+                         const uint32_t *ps,
+                         const uint32_t *pm,
+                         int             w)
 {
     uint32_t s, m, d;
 
@@ -1662,13 +1867,16 @@ core_combine_in_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, i
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
-                                                expand_alpha_1x64 (unpack_32_1x64 (d))));
-        w--;
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
+		expand_alpha_1x64 (unpack_32_1x64 (d))));
+
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1678,46 +1886,60 @@ core_combine_in_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, i
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
 
-        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		pix_multiply_1x64 (
+		    unpack_32_1x64 (s), unpack_32_1x64 (m)),
+		expand_alpha_1x64 (unpack_32_1x64 (d))));
 
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
-                                                expand_alpha_1x64 (unpack_32_1x64 (d))));
-        w--;
+	w--;
     }
 }
 
 static force_inline void
-core_combine_in_reverse_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_in_reverse_ca_sse2 (uint32_t *      pd,
+                                 const uint32_t *ps,
+                                 const uint32_t *pm,
+                                 int             w)
 {
     uint32_t s, m, d;
 
@@ -1733,14 +1955,16 @@ core_combine_in_reverse_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (d),
-                                                pix_multiply_1x64 (unpack_32_1x64 (m),
-                                                                  expand_alpha_1x64 (unpack_32_1x64 (s)))));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (d),
+		pix_multiply_1x64 (unpack_32_1x64 (m),
+				   expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1750,47 +1974,58 @@ core_combine_in_reverse_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
+	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
 
-        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (d),
-                                                pix_multiply_1x64 (unpack_32_1x64 (m),
-                                                                  expand_alpha_1x64 (unpack_32_1x64 (s)))));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (d),
+		pix_multiply_1x64 (unpack_32_1x64 (m),
+				   expand_alpha_1x64 (unpack_32_1x64 (s)))));
+	w--;
     }
 }
 
 static force_inline void
-core_combine_out_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_out_ca_sse2 (uint32_t *      pd,
+                          const uint32_t *ps,
+                          const uint32_t *pm,
+                          int             w)
 {
     uint32_t s, m, d;
 
@@ -1806,13 +2041,16 @@ core_combine_out_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
-                                                negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		pix_multiply_1x64 (
+		    unpack_32_1x64 (s), unpack_32_1x64 (m)),
+		negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1822,47 +2060,61 @@ core_combine_out_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
-
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-        negate_2x128 (xmm_alpha_lo, xmm_alpha_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
+	negate_2x128 (xmm_alpha_lo, xmm_alpha_hi,
+		      &xmm_alpha_lo, &xmm_alpha_hi);
+
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
+
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		pix_multiply_1x64 (
+		    unpack_32_1x64 (s), unpack_32_1x64 (m)),
+		negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
 
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (m)),
-                                                negate_1x64 (expand_alpha_1x64 (unpack_32_1x64 (d)))));
-        w--;
+	w--;
     }
 }
 
 static force_inline void
-core_combine_out_reverse_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_out_reverse_ca_sse2 (uint32_t *      pd,
+                                  const uint32_t *ps,
+                                  const uint32_t *pm,
+                                  int             w)
 {
     uint32_t s, m, d;
 
@@ -1878,14 +2130,17 @@ core_combine_out_reverse_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (d),
-                                                negate_1x64 (pix_multiply_1x64 (unpack_32_1x64 (m),
-                                                                               expand_alpha_1x64 (unpack_32_1x64 (s))))));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (d),
+		negate_1x64 (pix_multiply_1x64 (
+				 unpack_32_1x64 (m),
+				 expand_alpha_1x64 (unpack_32_1x64 (s))))));
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1895,50 +2150,62 @@ core_combine_out_reverse_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi);
 
-        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_mask_lo, &xmm_mask_hi);
+	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_alpha_lo, &xmm_alpha_hi,
+			    &xmm_mask_lo, &xmm_mask_hi);
 
-        negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	negate_2x128 (xmm_mask_lo, xmm_mask_hi,
+		      &xmm_mask_lo, &xmm_mask_hi);
 
-        pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
+	pix_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (d),
-                                                negate_1x64 (pix_multiply_1x64 (unpack_32_1x64 (m),
-                                                                               expand_alpha_1x64 (unpack_32_1x64 (s))))));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    pix_multiply_1x64 (
+		unpack_32_1x64 (d),
+		negate_1x64 (pix_multiply_1x64 (
+				 unpack_32_1x64 (m),
+				 expand_alpha_1x64 (unpack_32_1x64 (s))))));
+	w--;
     }
 }
 
 static force_inline uint32_t
-core_combine_atop_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
+core_combine_atop_ca_pixel_sse2 (uint32_t src,
+                                 uint32_t mask,
+                                 uint32_t dst)
 {
     __m64 m = unpack_32_1x64 (mask);
     __m64 s = unpack_32_1x64 (src);
@@ -1953,7 +2220,10 @@ core_combine_atop_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
 }
 
 static force_inline void
-core_combine_atop_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_atop_ca_sse2 (uint32_t *      pd,
+                           const uint32_t *ps,
+                           const uint32_t *pm,
+                           int             w)
 {
     uint32_t s, m, d;
 
@@ -1970,12 +2240,12 @@ core_combine_atop_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -1985,52 +2255,62 @@ core_combine_atop_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
 
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
 
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_src_lo, &xmm_src_hi);
-        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, &xmm_mask_lo, &xmm_mask_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_src_lo, &xmm_src_hi);
+	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi);
 
-        negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
 
-        pix_add_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
-                              &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-                              &xmm_dst_lo, &xmm_dst_hi);
+	pix_add_multiply_2x128 (
+	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+	    &xmm_dst_lo, &xmm_dst_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_atop_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 }
 
 static force_inline uint32_t
-core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
+core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src,
+                                         uint32_t mask,
+                                         uint32_t dst)
 {
     __m64 m = unpack_32_1x64 (mask);
     __m64 s = unpack_32_1x64 (src);
@@ -2046,7 +2326,10 @@ core_combine_reverse_atop_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t d
 }
 
 static force_inline void
-core_combine_reverse_atop_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_reverse_atop_ca_sse2 (uint32_t *      pd,
+                                   const uint32_t *ps,
+                                   const uint32_t *pm,
+                                   int             w)
 {
     uint32_t s, m, d;
 
@@ -2063,12 +2346,12 @@ core_combine_reverse_atop_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint3
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -2078,69 +2361,84 @@ core_combine_reverse_atop_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint3
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
-
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_src_lo, &xmm_src_hi);
-        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-        pix_add_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
-                              &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-                              &xmm_dst_lo, &xmm_dst_hi);
-
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_src_lo, &xmm_src_hi);
+	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi);
+
+	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+	pix_add_multiply_2x128 (
+	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+	    &xmm_dst_lo, &xmm_dst_hi);
+
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_reverse_atop_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 }
 
 static force_inline uint32_t
-core_combine_xor_ca_pixel_sse2 (uint32_t src, uint32_t mask, uint32_t dst)
+core_combine_xor_ca_pixel_sse2 (uint32_t src,
+                                uint32_t mask,
+                                uint32_t dst)
 {
     __m64 a = unpack_32_1x64 (mask);
     __m64 s = unpack_32_1x64 (src);
     __m64 d = unpack_32_1x64 (dst);
 
-    __m64 alpha_dst = negate_1x64 (pix_multiply_1x64 (a, expand_alpha_1x64 (s)));
+    __m64 alpha_dst = negate_1x64 (pix_multiply_1x64 (
+				       a, expand_alpha_1x64 (s)));
     __m64 dest      = pix_multiply_1x64 (s, a);
     __m64 alpha_src = negate_1x64 (expand_alpha_1x64 (d));
 
     return pack_1x64_32 (pix_add_multiply_1x64 (&d,
-                                              &alpha_dst,
-                                              &dest,
-                                              &alpha_src));
+                                                &alpha_dst,
+                                                &dest,
+                                                &alpha_src));
 }
 
 static force_inline void
-core_combine_xor_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_xor_ca_sse2 (uint32_t *      pd,
+                          const uint32_t *ps,
+                          const uint32_t *pm,
+                          int             w)
 {
     uint32_t s, m, d;
 
@@ -2157,12 +2455,12 @@ core_combine_xor_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -2172,53 +2470,66 @@ core_combine_xor_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
-
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-        expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi);
-        expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_src_lo, &xmm_src_hi);
-        pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, &xmm_alpha_src_lo, &xmm_alpha_src_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-        negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
-        negate_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-        pix_add_multiply_2x128 (&xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
-                              &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
-                              &xmm_dst_lo, &xmm_dst_hi);
-
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+
+	expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi);
+	expand_alpha_2x128 (xmm_dst_lo, xmm_dst_hi,
+			    &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_src_lo, &xmm_src_hi);
+	pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_alpha_src_lo, &xmm_alpha_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi);
+
+	negate_2x128 (xmm_alpha_dst_lo, xmm_alpha_dst_hi,
+		      &xmm_alpha_dst_lo, &xmm_alpha_dst_hi);
+	negate_2x128 (xmm_mask_lo, xmm_mask_hi,
+		      &xmm_mask_lo, &xmm_mask_hi);
+
+	pix_add_multiply_2x128 (
+	    &xmm_dst_lo, &xmm_dst_hi, &xmm_mask_lo, &xmm_mask_hi,
+	    &xmm_src_lo, &xmm_src_hi, &xmm_alpha_dst_lo, &xmm_alpha_dst_hi,
+	    &xmm_dst_lo, &xmm_dst_hi);
+
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
 
-        *pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
-        w--;
+	*pd++ = core_combine_xor_ca_pixel_sse2 (s, m, d);
+	w--;
     }
 }
 
 static force_inline void
-core_combine_add_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm, int w)
+core_combine_add_ca_sse2 (uint32_t *      pd,
+                          const uint32_t *ps,
+                          const uint32_t *pm,
+                          int             w)
 {
     uint32_t s, m, d;
 
@@ -2233,14 +2544,15 @@ core_combine_add_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w && (unsigned long)pd & 15)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (_mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
-                                                              unpack_32_1x64 (m)),
-                                            unpack_32_1x64 (d)));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
+					     unpack_32_1x64 (m)),
+			  unpack_32_1x64 (d)));
+	w--;
     }
 
     /* call prefetch hint to optimize cache load*/
@@ -2250,44 +2562,49 @@ core_combine_add_ca_sse2 (uint32_t *pd, const uint32_t *ps, const uint32_t *pm,
 
     while (w >= 4)
     {
-        /* fill cache line with next memory */
-        cache_prefetch_next ((__m128i*)ps);
-        cache_prefetch_next ((__m128i*)pd);
-        cache_prefetch_next ((__m128i*)pm);
+	/* fill cache line with next memory */
+	cache_prefetch_next ((__m128i*)ps);
+	cache_prefetch_next ((__m128i*)pd);
+	cache_prefetch_next ((__m128i*)pm);
 
-        xmm_src_hi = load_128_unaligned ((__m128i*)ps);
-        xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
-        xmm_dst_hi = load_128_aligned ((__m128i*)pd);
+	xmm_src_hi = load_128_unaligned ((__m128i*)ps);
+	xmm_mask_hi = load_128_unaligned ((__m128i*)pm);
+	xmm_dst_hi = load_128_aligned ((__m128i*)pd);
 
-        unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
-        unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-        unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+	unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	unpack_128_2x128 (xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
+	unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-        pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_mask_lo, &xmm_mask_hi, &xmm_src_lo, &xmm_src_hi);
+	pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+			    &xmm_mask_lo, &xmm_mask_hi,
+			    &xmm_src_lo, &xmm_src_hi);
 
-        save_128_aligned( (__m128i*)pd, pack_2x128_128 (_mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
-                                                      _mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
+	save_128_aligned (
+	    (__m128i*)pd, pack_2x128_128 (
+		_mm_adds_epu8 (xmm_src_lo, xmm_dst_lo),
+		_mm_adds_epu8 (xmm_src_hi, xmm_dst_hi)));
 
-        ps += 4;
-        pd += 4;
-        pm += 4;
-        w -= 4;
+	ps += 4;
+	pd += 4;
+	pm += 4;
+	w -= 4;
     }
 
     while (w)
     {
-        s = *ps++;
-        m = *pm++;
-        d = *pd;
-
-        *pd++ = pack_1x64_32 (_mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
-                                                              unpack_32_1x64 (m)),
-                                            unpack_32_1x64 (d)));
-        w--;
+	s = *ps++;
+	m = *pm++;
+	d = *pd;
+
+	*pd++ = pack_1x64_32 (
+	    _mm_adds_pu8 (pix_multiply_1x64 (unpack_32_1x64 (s),
+					     unpack_32_1x64 (m)),
+			  unpack_32_1x64 (d)));
+	w--;
     }
 }
 
-/* -------------------------------------------------------------------------------------------------
+/* ---------------------------------------------------
  * fb_compose_setup_sSE2
  */
 static force_inline __m64
@@ -2303,13 +2620,15 @@ create_mask_16_128 (uint16_t mask)
 }
 
 static force_inline __m64
-create_mask_2x32_64 (uint32_t mask0, uint32_t mask1)
+create_mask_2x32_64 (uint32_t mask0,
+                     uint32_t mask1)
 {
     return _mm_set_pi32 (mask0, mask1);
 }
 
 static force_inline __m128i
-create_mask_2x32_128 (uint32_t mask0, uint32_t mask1)
+create_mask_2x32_128 (uint32_t mask0,
+                      uint32_t mask1)
 {
     return _mm_set_epi32 (mask0, mask1, mask0, mask1);
 }
@@ -2317,382 +2636,484 @@ create_mask_2x32_128 (uint32_t mask0, uint32_t mask1)
 /* SSE2 code patch for fbcompose.c */
 
 static void
-sse2_combine_over_u (pixman_implementation_t *imp, pixman_op_t op,
-		  uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_over_u (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dst,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     core_combine_over_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_over_reverse_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dst,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     core_combine_over_reverse_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_in_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_in_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dst,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     core_combine_in_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		       uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_in_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dst,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
 {
     core_combine_reverse_in_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_out_u (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_out_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     core_combine_out_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_out_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dst,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     core_combine_reverse_out_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_atop_u (pixman_implementation_t *imp, pixman_op_t op,
-		  uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_atop_u (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dst,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     core_combine_atop_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dst,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     core_combine_reverse_atop_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_xor_u (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_xor_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     core_combine_xor_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_add_u (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_add_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     core_combine_add_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_saturate_u (pixman_implementation_t *imp, pixman_op_t op,
-		      uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_saturate_u (pixman_implementation_t *imp,
+                         pixman_op_t              op,
+                         uint32_t *               dst,
+                         const uint32_t *         src,
+                         const uint32_t *         mask,
+                         int                      width)
 {
     core_combine_saturate_u_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_src_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_src_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dst,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     core_combine_src_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_over_ca (pixman_implementation_t *imp, pixman_op_t op,
-		  uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_over_ca (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               dst,
+                      const uint32_t *         src,
+                      const uint32_t *         mask,
+                      int                      width)
 {
     core_combine_over_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_over_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              uint32_t *               dst,
+                              const uint32_t *         src,
+                              const uint32_t *         mask,
+                              int                      width)
 {
     core_combine_over_reverse_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_in_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_in_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dst,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     core_combine_in_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dst,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     core_combine_in_reverse_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_out_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_out_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dst,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     core_combine_out_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dst,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     core_combine_out_reverse_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_atop_ca (pixman_implementation_t *imp, pixman_op_t op,
-		  uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_atop_ca (pixman_implementation_t *imp,
+                      pixman_op_t              op,
+                      uint32_t *               dst,
+                      const uint32_t *         src,
+                      const uint32_t *         mask,
+                      int                      width)
 {
     core_combine_atop_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_atop_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                              pixman_op_t              op,
+                              uint32_t *               dst,
+                              const uint32_t *         src,
+                              const uint32_t *         mask,
+                              int                      width)
 {
     core_combine_reverse_atop_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_xor_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_xor_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dst,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     core_combine_xor_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-sse2_combine_add_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dst, const uint32_t *src, const uint32_t *mask, int width)
+sse2_combine_add_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dst,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     core_combine_add_ca_sse2 (dst, src, mask, width);
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_8888
+/* -------------------------------------------------------------------
+ * composite_over_n_8888
  */
 
 static void
 sse2_composite_over_n_8888 (pixman_implementation_t *imp,
-			     pixman_op_t op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int32_t	src_x,
-			    int32_t	src_y,
-			    int32_t	mask_x,
-			    int32_t	mask_y,
-			    int32_t	dest_x,
-			    int32_t	dest_y,
-			    int32_t	width,
-			    int32_t	height)
-{
-    uint32_t	src;
-    uint32_t	*dst_line, *dst, d;
-    uint16_t	w;
-    int	dst_stride;
+                            pixman_op_t              op,
+                            pixman_image_t *         src_image,
+                            pixman_image_t *         mask_image,
+                            pixman_image_t *         dst_image,
+                            int32_t                  src_x,
+                            int32_t                  src_y,
+                            int32_t                  mask_x,
+                            int32_t                  mask_y,
+                            int32_t                  dest_x,
+                            int32_t                  dest_y,
+                            int32_t                  width,
+                            int32_t                  height)
+{
+    uint32_t src;
+    uint32_t    *dst_line, *dst, d;
+    uint16_t w;
+    int dst_stride;
     __m128i xmm_src, xmm_alpha;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
 
     while (height--)
     {
-        dst = dst_line;
+	dst = dst_line;
 
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
 
-        dst_line += dst_stride;
-        w = width;
+	dst_line += dst_stride;
+	w = width;
 
-        while (w && (unsigned long)dst & 15)
-        {
-            d = *dst;
-            *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-                                              _mm_movepi64_pi64 (xmm_alpha),
-                                              unpack_32_1x64 (d)));
-            w--;
-        }
+	while (w && (unsigned long)dst & 15)
+	{
+	    d = *dst;
+	    *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
+	                                      _mm_movepi64_pi64 (xmm_alpha),
+	                                      unpack_32_1x64 (d)));
+	    w--;
+	}
 
-        cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)dst);
 
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)dst);
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)dst);
 
-            xmm_dst = load_128_aligned ((__m128i*)dst);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
 
-            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
-            over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_dst_lo, &xmm_dst_hi);
+	    over_2x128 (&xmm_src, &xmm_src,
+			&xmm_alpha, &xmm_alpha,
+			&xmm_dst_lo, &xmm_dst_hi);
 
-            /* rebuid the 4 pixel data and save*/
-            save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    /* rebuid the 4 pixel data and save*/
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-            w -= 4;
-            dst += 4;
-        }
+	    w -= 4;
+	    dst += 4;
+	}
 
-        while (w)
-        {
-            d = *dst;
-            *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-                                              _mm_movepi64_pi64 (xmm_alpha),
-                                              unpack_32_1x64 (d)));
-            w--;
-        }
+	while (w)
+	{
+	    d = *dst;
+	    *dst++ = pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
+	                                      _mm_movepi64_pi64 (xmm_alpha),
+	                                      unpack_32_1x64 (d)));
+	    w--;
+	}
 
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_0565
+/* ---------------------------------------------------------------------
+ * composite_over_n_0565
  */
 static void
 sse2_composite_over_n_0565 (pixman_implementation_t *imp,
-			     pixman_op_t op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int32_t	src_x,
-			    int32_t	src_y,
-			    int32_t	mask_x,
-			    int32_t	mask_y,
-			    int32_t	dest_x,
-			    int32_t	dest_y,
-			    int32_t	width,
-			    int32_t	height)
-{
-    uint32_t	src;
-    uint16_t	*dst_line, *dst, d;
-    uint16_t	w;
-    int	        dst_stride;
+                            pixman_op_t              op,
+                            pixman_image_t *         src_image,
+                            pixman_image_t *         mask_image,
+                            pixman_image_t *         dst_image,
+                            int32_t                  src_x,
+                            int32_t                  src_y,
+                            int32_t                  mask_x,
+                            int32_t                  mask_y,
+                            int32_t                  dest_x,
+                            int32_t                  dest_y,
+                            int32_t                  width,
+                            int32_t                  height)
+{
+    uint32_t src;
+    uint16_t    *dst_line, *dst, d;
+    uint16_t w;
+    int dst_stride;
     __m128i xmm_src, xmm_alpha;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (src == 0)
-        return;
+	return;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
 
     while (height--)
     {
-        dst = dst_line;
+	dst = dst_line;
 
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
 
-        dst_line += dst_stride;
-        w = width;
+	dst_line += dst_stride;
+	w = width;
 
-        while (w && (unsigned long)dst & 15)
-        {
-            d = *dst;
+	while (w && (unsigned long)dst & 15)
+	{
+	    d = *dst;
 
-            *dst++ = pack_565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-                                                             _mm_movepi64_pi64 (xmm_alpha),
-                                                             expand565_16_1x64 (d))));
-            w--;
-        }
+	    *dst++ = pack_565_32_16 (
+		pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
+					 _mm_movepi64_pi64 (xmm_alpha),
+					 expand565_16_1x64 (d))));
+	    w--;
+	}
 
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
 
-        while (w >= 8)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)dst);
+	while (w >= 8)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)dst);
 
 	    xmm_dst = load_128_aligned ((__m128i*)dst);
-	    
-	    unpack_565_128_4x128 (xmm_dst, &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-	    
-            over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_dst0, &xmm_dst1);
-            over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_dst2, &xmm_dst3);
-
-            xmm_dst = pack_565_4x128_128 (&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-            save_128_aligned ((__m128i*)dst, xmm_dst);
-
-            dst += 8;
-            w -= 8;
-        }
-
-        while (w--)
-        {
-            d = *dst;
-            *dst++ = pack_565_32_16 (pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
-                                                             _mm_movepi64_pi64 (xmm_alpha),
-                                                             expand565_16_1x64 (d))));
-        }
+
+	    unpack_565_128_4x128 (xmm_dst,
+				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+
+	    over_2x128 (&xmm_src, &xmm_src,
+			&xmm_alpha, &xmm_alpha,
+			&xmm_dst0, &xmm_dst1);
+	    over_2x128 (&xmm_src, &xmm_src,
+			&xmm_alpha, &xmm_alpha,
+			&xmm_dst2, &xmm_dst3);
+
+	    xmm_dst = pack_565_4x128_128 (
+		&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+
+	    save_128_aligned ((__m128i*)dst, xmm_dst);
+
+	    dst += 8;
+	    w -= 8;
+	}
+
+	while (w--)
+	{
+	    d = *dst;
+	    *dst++ = pack_565_32_16 (
+		pack_1x64_32 (over_1x64 (_mm_movepi64_pi64 (xmm_src),
+					 _mm_movepi64_pi64 (xmm_alpha),
+					 expand565_16_1x64 (d))));
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_8888_8888_ca
+/* ---------------------------------------------------------------------------
+ * composite_over_n_8888_8888_ca
  */
 
 static void
 sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-				       pixman_op_t op,
-				      pixman_image_t * src_image,
-				      pixman_image_t * mask_image,
-				      pixman_image_t * dst_image,
-				      int32_t	src_x,
-				      int32_t	src_y,
-				      int32_t	mask_x,
-				      int32_t	mask_y,
-				      int32_t	dest_x,
-				      int32_t	dest_y,
-				      int32_t	width,
-				      int32_t	height)
-{
-    uint32_t	src;
-    uint32_t	*dst_line, d;
-    uint32_t	*mask_line, m;
-    uint32_t    pack_cmp;
-    int	dst_stride, mask_stride;
+                                    pixman_op_t              op,
+                                    pixman_image_t *         src_image,
+                                    pixman_image_t *         mask_image,
+                                    pixman_image_t *         dst_image,
+                                    int32_t                  src_x,
+                                    int32_t                  src_y,
+                                    int32_t                  mask_x,
+                                    int32_t                  mask_y,
+                                    int32_t                  dest_x,
+                                    int32_t                  dest_y,
+                                    int32_t                  width,
+                                    int32_t                  height)
+{
+    uint32_t src;
+    uint32_t    *dst_line, d;
+    uint32_t    *mask_line, m;
+    uint32_t pack_cmp;
+    int dst_stride, mask_stride;
 
     __m128i xmm_src, xmm_alpha;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
@@ -2700,253 +3121,266 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (src == 0)
 	return;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
-    xmm_src = _mm_unpacklo_epi8 (create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
+    xmm_src = _mm_unpacklo_epi8 (
+	create_mask_2x32_128 (src, src), _mm_setzero_si128 ());
     xmm_alpha = expand_alpha_1x128 (xmm_src);
     mmx_src   = _mm_movepi64_pi64 (xmm_src);
     mmx_alpha = _mm_movepi64_pi64 (xmm_alpha);
 
     while (height--)
     {
-        int w = width;
-        const uint32_t *pm = (uint32_t *)mask_line;
-        uint32_t *pd = (uint32_t *)dst_line;
-
-        dst_line += dst_stride;
-        mask_line += mask_stride;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)pd);
-        cache_prefetch ((__m128i*)pm);
-
-        while (w && (unsigned long)pd & 15)
-        {
-            m = *pm++;
-
-            if (m)
-            {
-                d = *pd;
-                mmx_mask = unpack_32_1x64 (m);
-                mmx_dest = unpack_32_1x64 (d);
-
-                *pd = pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                 &mmx_alpha,
-                                                 &mmx_mask,
-                                                 &mmx_dest));
-            }
-
-            pd++;
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)pd);
-        cache_prefetch ((__m128i*)pm);
-
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)pd);
-            cache_prefetch_next ((__m128i*)pm);
-
-            xmm_mask = load_128_unaligned ((__m128i*)pm);
-
-            pack_cmp = _mm_movemask_epi8 (_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128()));
-
-            /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
-            if (pack_cmp != 0xffff)
-            {
-                xmm_dst = load_128_aligned ((__m128i*)pd);
-
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-                in_over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-                save_128_aligned ((__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-            }
-
-            pd += 4;
-            pm += 4;
-            w -= 4;
-        }
-
-        while (w)
-        {
-            m = *pm++;
-
-            if (m)
-            {
-                d = *pd;
-                mmx_mask = unpack_32_1x64 (m);
-                mmx_dest = unpack_32_1x64 (d);
-
-                *pd = pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                 &mmx_alpha,
-                                                 &mmx_mask,
-                                                 &mmx_dest));
-            }
-
-            pd++;
-            w--;
-        }
+	int w = width;
+	const uint32_t *pm = (uint32_t *)mask_line;
+	uint32_t *pd = (uint32_t *)dst_line;
+
+	dst_line += dst_stride;
+	mask_line += mask_stride;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)pd);
+	cache_prefetch ((__m128i*)pm);
+
+	while (w && (unsigned long)pd & 15)
+	{
+	    m = *pm++;
+
+	    if (m)
+	    {
+		d = *pd;
+		mmx_mask = unpack_32_1x64 (m);
+		mmx_dest = unpack_32_1x64 (d);
+
+		*pd = pack_1x64_32 (in_over_1x64 (&mmx_src,
+		                                  &mmx_alpha,
+		                                  &mmx_mask,
+		                                  &mmx_dest));
+	    }
+
+	    pd++;
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)pd);
+	cache_prefetch ((__m128i*)pm);
+
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)pd);
+	    cache_prefetch_next ((__m128i*)pm);
+
+	    xmm_mask = load_128_unaligned ((__m128i*)pm);
+
+	    pack_cmp =
+		_mm_movemask_epi8 (
+		    _mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+	    /* if all bits in mask are zero, pack_cmp are equal to 0xffff */
+	    if (pack_cmp != 0xffff)
+	    {
+		xmm_dst = load_128_aligned ((__m128i*)pd);
+
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+		in_over_2x128 (&xmm_src, &xmm_src,
+			       &xmm_alpha, &xmm_alpha,
+			       &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst_lo, &xmm_dst_hi);
+
+		save_128_aligned (
+		    (__m128i*)pd, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
+
+	    pd += 4;
+	    pm += 4;
+	    w -= 4;
+	}
+
+	while (w)
+	{
+	    m = *pm++;
+
+	    if (m)
+	    {
+		d = *pd;
+		mmx_mask = unpack_32_1x64 (m);
+		mmx_dest = unpack_32_1x64 (d);
+
+		*pd = pack_1x64_32 (
+		    in_over_1x64 (&mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest));
+	    }
+
+	    pd++;
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_8888_n_8888
+/*---------------------------------------------------------------------
+ * composite_over_8888_n_8888
  */
 
 static void
 sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
-				pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t	src_x,
-			       int32_t	src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t	mask;
-    uint16_t	w;
-    int	dst_stride, src_stride;
+                                 pixman_op_t              op,
+                                 pixman_image_t *         src_image,
+                                 pixman_image_t *         mask_image,
+                                 pixman_image_t *         dst_image,
+                                 int32_t                  src_x,
+                                 int32_t                  src_y,
+                                 int32_t                  mask_x,
+                                 int32_t                  mask_y,
+                                 int32_t                  dest_x,
+                                 int32_t                  dest_y,
+                                 int32_t                  width,
+                                 int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    uint16_t w;
+    int dst_stride, src_stride;
 
     __m128i xmm_mask;
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_alpha_lo, xmm_alpha_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
 
     xmm_mask = create_mask_16_128 (mask >> 24);
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        src = src_line;
-        src_line += src_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
-        cache_prefetch ((__m128i*)src);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            uint32_t s = *src++;
-            uint32_t d = *dst;
-
-            __m64 ms = unpack_32_1x64 (s);
-            __m64 alpha    = expand_alpha_1x64 (ms);
-            __m64 dest     = _mm_movepi64_pi64 (xmm_mask);
-            __m64 alpha_dst = unpack_32_1x64 (d);
-
-            *dst++ = pack_1x64_32 (in_over_1x64 (&ms,
-                                                &alpha,
-                                                &dest,
-                                                &alpha_dst));
-
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
-        cache_prefetch ((__m128i*)src);
-
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)dst);
-            cache_prefetch_next ((__m128i*)src);
-
-            xmm_src = load_128_unaligned ((__m128i*)src);
-            xmm_dst = load_128_aligned ((__m128i*)dst);
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)src);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    uint32_t s = *src++;
+	    uint32_t d = *dst;
+
+	    __m64 ms = unpack_32_1x64 (s);
+	    __m64 alpha    = expand_alpha_1x64 (ms);
+	    __m64 dest     = _mm_movepi64_pi64 (xmm_mask);
+	    __m64 alpha_dst = unpack_32_1x64 (d);
+
+	    *dst++ = pack_1x64_32 (
+		in_over_1x64 (&ms, &alpha, &dest, &alpha_dst));
+
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)src);
+
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)dst);
+	    cache_prefetch_next ((__m128i*)src);
+
+	    xmm_src = load_128_unaligned ((__m128i*)src);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
 
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+				&xmm_alpha_lo, &xmm_alpha_hi);
 
-            in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_mask, &xmm_mask, &xmm_dst_lo, &xmm_dst_hi);
+	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			   &xmm_alpha_lo, &xmm_alpha_hi,
+			   &xmm_mask, &xmm_mask,
+			   &xmm_dst_lo, &xmm_dst_hi);
 
-            save_128_aligned( (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-            dst += 4;
-            src += 4;
-            w -= 4;
-        }
+	    dst += 4;
+	    src += 4;
+	    w -= 4;
+	}
 
-        while (w)
-        {
-            uint32_t s = *src++;
-            uint32_t d = *dst;
+	while (w)
+	{
+	    uint32_t s = *src++;
+	    uint32_t d = *dst;
 
-            __m64 ms = unpack_32_1x64 (s);
-            __m64 alpha = expand_alpha_1x64 (ms);
-            __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-            __m64 dest  = unpack_32_1x64 (d);
+	    __m64 ms = unpack_32_1x64 (s);
+	    __m64 alpha = expand_alpha_1x64 (ms);
+	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
+	    __m64 dest  = unpack_32_1x64 (d);
 
-            *dst++ = pack_1x64_32 (in_over_1x64 (&ms,
-                                                &alpha,
-                                                &mask,
-                                                &dest));
+	    *dst++ = pack_1x64_32 (
+		in_over_1x64 (&ms, &alpha, &mask, &dest));
 
-            w--;
-        }
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_x888_n_8888
+/* ---------------------------------------------------------------------
+ * composite_over_x888_n_8888
  */
 static void
 sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
-				pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t	src_x,
-			       int32_t	src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t	mask;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+                                 pixman_op_t              op,
+                                 pixman_image_t *         src_image,
+                                 pixman_image_t *         mask_image,
+                                 pixman_image_t *         dst_image,
+                                 int32_t                  src_x,
+                                 int32_t                  src_y,
+                                 int32_t                  mask_x,
+                                 int32_t                  mask_y,
+                                 int32_t                  dest_x,
+                                 int32_t                  dest_y,
+                                 int32_t                  width,
+                                 int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    int dst_stride, src_stride;
+    uint16_t w;
 
     __m128i xmm_mask, xmm_alpha;
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
 
     xmm_mask = create_mask_16_128 (mask >> 24);
@@ -2954,160 +3388,166 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        src = src_line;
-        src_line += src_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
-        cache_prefetch ((__m128i*)src);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            uint32_t s = (*src++) | 0xff000000;
-            uint32_t d = *dst;
-
-            __m64 src   = unpack_32_1x64 (s);
-            __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
-            __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-            __m64 dest  = unpack_32_1x64 (d);
-
-            *dst++ = pack_1x64_32 (in_over_1x64 (&src,
-                                                &alpha,
-                                                &mask,
-                                                &dest));
-
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)dst);
-        cache_prefetch ((__m128i*)src);
-
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)dst);
-            cache_prefetch_next ((__m128i*)src);
-
-            xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
-            xmm_dst = load_128_aligned ((__m128i*)dst);
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)src);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    uint32_t s = (*src++) | 0xff000000;
+	    uint32_t d = *dst;
+
+	    __m64 src   = unpack_32_1x64 (s);
+	    __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
+	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
+	    __m64 dest  = unpack_32_1x64 (d);
+
+	    *dst++ = pack_1x64_32 (
+		in_over_1x64 (&src, &alpha, &mask, &dest));
+
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)src);
+
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)dst);
+	    cache_prefetch_next ((__m128i*)src);
+
+	    xmm_src = _mm_or_si128 (
+		load_128_unaligned ((__m128i*)src), mask_ff000000);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
 
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
 
-            in_over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha, &xmm_alpha, &xmm_mask, &xmm_mask, &xmm_dst_lo, &xmm_dst_hi);
+	    in_over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			   &xmm_alpha, &xmm_alpha,
+			   &xmm_mask, &xmm_mask,
+			   &xmm_dst_lo, &xmm_dst_hi);
 
-            save_128_aligned( (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
 
-            dst += 4;
-            src += 4;
-            w -= 4;
+	    dst += 4;
+	    src += 4;
+	    w -= 4;
 
-        }
+	}
 
-        while (w)
-        {
-            uint32_t s = (*src++) | 0xff000000;
-            uint32_t d = *dst;
+	while (w)
+	{
+	    uint32_t s = (*src++) | 0xff000000;
+	    uint32_t d = *dst;
 
-            __m64 src  = unpack_32_1x64 (s);
-            __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
-            __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
-            __m64 dest  = unpack_32_1x64 (d);
+	    __m64 src  = unpack_32_1x64 (s);
+	    __m64 alpha = _mm_movepi64_pi64 (xmm_alpha);
+	    __m64 mask  = _mm_movepi64_pi64 (xmm_mask);
+	    __m64 dest  = unpack_32_1x64 (d);
 
-            *dst++ = pack_1x64_32 (in_over_1x64 (&src,
-                                                &alpha,
-                                                &mask,
-                                                &dest));
+	    *dst++ = pack_1x64_32 (
+		in_over_1x64 (&src, &alpha, &mask, &dest));
 
-            w--;
-        }
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_8888_8888
+/* --------------------------------------------------------------------
+ * composite_over_8888_8888
  */
 static void
 sse2_composite_over_8888_8888 (pixman_implementation_t *imp,
-			      pixman_op_t op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t	src_x,
-			     int32_t	src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
-{
-    int	        dst_stride, src_stride;
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+                               pixman_op_t              op,
+                               pixman_image_t *         src_image,
+                               pixman_image_t *         mask_image,
+                               pixman_image_t *         dst_image,
+                               int32_t                  src_x,
+                               int32_t                  src_y,
+                               int32_t                  mask_x,
+                               int32_t                  mask_y,
+                               int32_t                  dest_x,
+                               int32_t                  dest_y,
+                               int32_t                  width,
+                               int32_t                  height)
+{
+    int dst_stride, src_stride;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
     dst = dst_line;
     src = src_line;
 
     while (height--)
     {
-        core_combine_over_u_sse2 (dst, src, NULL, width);
+	core_combine_over_u_sse2 (dst, src, NULL, width);
 
-        dst += dst_stride;
-        src += src_stride;
+	dst += dst_stride;
+	src += src_stride;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_8888_0565
+/* ------------------------------------------------------------------
+ * composite_over_8888_0565
  */
 static force_inline uint16_t
-fast_composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
+composite_over_8888_0565pixel (uint32_t src, uint16_t dst)
 {
-    __m64       ms;
+    __m64 ms;
 
     ms = unpack_32_1x64 (src);
-    return pack_565_32_16( pack_1x64_32 (over_1x64 (ms,
-                                                   expand_alpha_1x64 (ms),
-                                                   expand565_16_1x64 (dst))));
+    return pack_565_32_16 (
+	pack_1x64_32 (
+	    over_1x64 (
+		ms, expand_alpha_1x64 (ms), expand565_16_1x64 (dst))));
 }
 
 static void
 sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
-			      pixman_op_t op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t      src_x,
-			     int32_t      src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
-{
-    uint16_t	*dst_line, *dst, d;
-    uint32_t	*src_line, *src, s;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+                               pixman_op_t              op,
+                               pixman_image_t *         src_image,
+                               pixman_image_t *         mask_image,
+                               pixman_image_t *         dst_image,
+                               int32_t                  src_x,
+                               int32_t                  src_y,
+                               int32_t                  mask_x,
+                               int32_t                  mask_y,
+                               int32_t                  dest_x,
+                               int32_t                  dest_y,
+                               int32_t                  width,
+                               int32_t                  height)
+{
+    uint16_t    *dst_line, *dst, d;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    uint16_t w;
 
     __m128i xmm_alpha_lo, xmm_alpha_hi;
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
 #if 0
     /* FIXME
@@ -3120,102 +3560,115 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
 
     while (height--)
     {
-        dst = dst_line;
-        src = src_line;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        dst_line += dst_stride;
-        src_line += src_stride;
-        w = width;
-
-        /* Align dst on a 16-byte boundary */
-        while (w &&
-               ((unsigned long)dst & 15))
-        {
-            s = *src++;
-            d = *dst;
-
-            *dst++ = fast_composite_over_8888_0565pixel (s, d);
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        /* It's a 8 pixel loop */
-        while (w >= 8)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)src);
-            cache_prefetch_next ((__m128i*)dst);
-
-            /* I'm loading unaligned because I'm not sure about the address alignment. */
-            xmm_src = load_128_unaligned ((__m128i*) src);
-            xmm_dst = load_128_aligned ((__m128i*) dst);
-
-            /* Unpacking */
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-            unpack_565_128_4x128 (xmm_dst, &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-
-            /* I'm loading next 4 pixels from memory before to optimze the memory read. */
-            xmm_src = load_128_unaligned ((__m128i*) (src+4));
-
-            over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_dst0, &xmm_dst1);
-
-            /* Unpacking */
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-            expand_alpha_2x128 (xmm_src_lo, xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi);
-
-            over_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_alpha_lo, &xmm_alpha_hi, &xmm_dst2, &xmm_dst3);
-
-            save_128_aligned ((__m128i*)dst, pack_565_4x128_128 (&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-            w -= 8;
-            dst += 8;
-            src += 8;
-        }
-
-        while (w--)
-        {
-            s = *src++;
-            d = *dst;
-
-            *dst++ = fast_composite_over_8888_0565pixel (s, d);
-        }
+	dst = dst_line;
+	src = src_line;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	dst_line += dst_stride;
+	src_line += src_stride;
+	w = width;
+
+	/* Align dst on a 16-byte boundary */
+	while (w &&
+	       ((unsigned long)dst & 15))
+	{
+	    s = *src++;
+	    d = *dst;
+
+	    *dst++ = composite_over_8888_0565pixel (s, d);
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	/* It's a 8 pixel loop */
+	while (w >= 8)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)src);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    /* I'm loading unaligned because I'm not sure
+	     * about the address alignment.
+	     */
+	    xmm_src = load_128_unaligned ((__m128i*) src);
+	    xmm_dst = load_128_aligned ((__m128i*) dst);
+
+	    /* Unpacking */
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    unpack_565_128_4x128 (xmm_dst,
+				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+				&xmm_alpha_lo, &xmm_alpha_hi);
+
+	    /* I'm loading next 4 pixels from memory
+	     * before to optimze the memory read.
+	     */
+	    xmm_src = load_128_unaligned ((__m128i*) (src + 4));
+
+	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			&xmm_alpha_lo, &xmm_alpha_hi,
+			&xmm_dst0, &xmm_dst1);
+
+	    /* Unpacking */
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    expand_alpha_2x128 (xmm_src_lo, xmm_src_hi,
+				&xmm_alpha_lo, &xmm_alpha_hi);
+
+	    over_2x128 (&xmm_src_lo, &xmm_src_hi,
+			&xmm_alpha_lo, &xmm_alpha_hi,
+			&xmm_dst2, &xmm_dst3);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_565_4x128_128 (
+		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+	    w -= 8;
+	    dst += 8;
+	    src += 8;
+	}
+
+	while (w--)
+	{
+	    s = *src++;
+	    d = *dst;
+
+	    *dst++ = composite_over_8888_0565pixel (s, d);
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_8_8888
+/* -----------------------------------------------------------------
+ * composite_over_n_8_8888
  */
 
 static void
 sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
-				   pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint32_t	src, srca;
-    uint32_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
     uint32_t m, d;
 
     __m128i xmm_src, xmm_alpha, xmm_def;
@@ -3224,14 +3677,16 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
 	return;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
     xmm_def = create_mask_2x32_128 (src, src);
     xmm_src = expand_pixel_32_1x128 (src);
@@ -3241,114 +3696,119 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            uint8_t m = *mask++;
-
-            if (m)
-            {
-                d = *dst;
-                mmx_mask = expand_pixel_8_1x64 (m);
-                mmx_dest = unpack_32_1x64 (d);
-
-                *dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                  &mmx_alpha,
-                                                  &mmx_mask,
-                                                  &mmx_dest));
-            }
-
-            w--;
-            dst++;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)mask);
-            cache_prefetch_next ((__m128i*)dst);
-
-            m = *((uint32_t*)mask);
-
-            if (srca == 0xff && m == 0xffffffff)
-            {
-                save_128_aligned ((__m128i*)dst, xmm_def);
-            }
-            else if (m)
-            {
-                xmm_dst = load_128_aligned ((__m128i*) dst);
-                xmm_mask = unpack_32_1x128 (m);
-                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128());
-
-                /* Unpacking */
-                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-                in_over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-            }
-
-            w -= 4;
-            dst += 4;
-            mask += 4;
-        }
-
-        while (w)
-        {
-            uint8_t m = *mask++;
-
-            if (m)
-            {
-                d = *dst;
-                mmx_mask = expand_pixel_8_1x64 (m);
-                mmx_dest = unpack_32_1x64 (d);
-
-                *dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                  &mmx_alpha,
-                                                  &mmx_mask,
-                                                  &mmx_dest));
-            }
-
-            w--;
-            dst++;
-        }
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    uint8_t m = *mask++;
+
+	    if (m)
+	    {
+		d = *dst;
+		mmx_mask = expand_pixel_8_1x64 (m);
+		mmx_dest = unpack_32_1x64 (d);
+
+		*dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
+		                                   &mmx_alpha,
+		                                   &mmx_mask,
+		                                   &mmx_dest));
+	    }
+
+	    w--;
+	    dst++;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)mask);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    m = *((uint32_t*)mask);
+
+	    if (srca == 0xff && m == 0xffffffff)
+	    {
+		save_128_aligned ((__m128i*)dst, xmm_def);
+	    }
+	    else if (m)
+	    {
+		xmm_dst = load_128_aligned ((__m128i*) dst);
+		xmm_mask = unpack_32_1x128 (m);
+		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+		/* Unpacking */
+		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+					&xmm_mask_lo, &xmm_mask_hi);
+
+		in_over_2x128 (&xmm_src, &xmm_src,
+			       &xmm_alpha, &xmm_alpha,
+			       &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst_lo, &xmm_dst_hi);
+
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
+
+	    w -= 4;
+	    dst += 4;
+	    mask += 4;
+	}
+
+	while (w)
+	{
+	    uint8_t m = *mask++;
+
+	    if (m)
+	    {
+		d = *dst;
+		mmx_mask = expand_pixel_8_1x64 (m);
+		mmx_dest = unpack_32_1x64 (d);
+
+		*dst = pack_1x64_32 (in_over_1x64 (&mmx_src,
+		                                   &mmx_alpha,
+		                                   &mmx_mask,
+		                                   &mmx_dest));
+	    }
+
+	    w--;
+	    dst++;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_8_8888
+/* ----------------------------------------------------------------
+ * composite_over_n_8_8888
  */
 
 pixman_bool_t
 pixman_fill_sse2 (uint32_t *bits,
-		 int stride,
-		 int bpp,
-		 int x,
-		 int y,
-		 int width,
-		 int height,
-		 uint32_t data)
+                  int       stride,
+                  int       bpp,
+                  int       x,
+                  int       y,
+                  int       width,
+                  int       height,
+                  uint32_t  data)
 {
-    uint32_t	byte_width;
-    uint8_t	    *byte_line;
+    uint32_t byte_width;
+    uint8_t         *byte_line;
 
     __m128i xmm_def;
 
@@ -3360,17 +3820,17 @@ pixman_fill_sse2 (uint32_t *bits,
 
     if (bpp == 16)
     {
-        stride = stride * (int) sizeof (uint32_t) / 2;
-        byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
-        byte_width = 2 * width;
-        stride *= 2;
+	stride = stride * (int) sizeof (uint32_t) / 2;
+	byte_line = (uint8_t *)(((uint16_t *)bits) + stride * y + x);
+	byte_width = 2 * width;
+	stride *= 2;
     }
     else
     {
-        stride = stride * (int) sizeof (uint32_t) / 4;
-        byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
-        byte_width = 4 * width;
-        stride *= 4;
+	stride = stride * (int) sizeof (uint32_t) / 4;
+	byte_line = (uint8_t *)(((uint32_t *)bits) + stride * y + x);
+	byte_width = 4 * width;
+	stride *= 4;
     }
 
     cache_prefetch ((__m128i*)byte_line);
@@ -3378,258 +3838,268 @@ pixman_fill_sse2 (uint32_t *bits,
 
     while (height--)
     {
-        int w;
-        uint8_t *d = byte_line;
-        byte_line += stride;
-        w = byte_width;
-
-
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 2 && ((unsigned long)d & 3))
-        {
-            *(uint16_t *)d = data;
-            w -= 2;
-            d += 2;
-        }
-
-        while (w >= 4 && ((unsigned long)d & 15))
-        {
-            *(uint32_t *)d = data;
-
-            w -= 4;
-            d += 4;
-        }
-
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 128)
-        {
-            cache_prefetch (((__m128i*)d) + 12);
-
-            save_128_aligned ((__m128i*)(d),     xmm_def);
-            save_128_aligned ((__m128i*)(d+16),  xmm_def);
-            save_128_aligned ((__m128i*)(d+32),  xmm_def);
-            save_128_aligned ((__m128i*)(d+48),  xmm_def);
-            save_128_aligned ((__m128i*)(d+64),  xmm_def);
-            save_128_aligned ((__m128i*)(d+80),  xmm_def);
-            save_128_aligned ((__m128i*)(d+96),  xmm_def);
-            save_128_aligned ((__m128i*)(d+112), xmm_def);
-
-            d += 128;
-            w -= 128;
-        }
-
-        if (w >= 64)
-        {
-            cache_prefetch (((__m128i*)d) + 8);
-
-            save_128_aligned ((__m128i*)(d),     xmm_def);
-            save_128_aligned ((__m128i*)(d+16),  xmm_def);
-            save_128_aligned ((__m128i*)(d+32),  xmm_def);
-            save_128_aligned ((__m128i*)(d+48),  xmm_def);
-
-            d += 64;
-            w -= 64;
-        }
-
-        cache_prefetch_next ((__m128i*)d);
-
-        if (w >= 32)
-        {
-            save_128_aligned ((__m128i*)(d),     xmm_def);
-            save_128_aligned ((__m128i*)(d+16),  xmm_def);
-
-            d += 32;
-            w -= 32;
-        }
-
-        if (w >= 16)
-        {
-            save_128_aligned ((__m128i*)(d),     xmm_def);
-
-            d += 16;
-            w -= 16;
-        }
-
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 4)
-        {
-            *(uint32_t *)d = data;
-
-            w -= 4;
-            d += 4;
-        }
-
-        if (w >= 2)
-        {
-            *(uint16_t *)d = data;
-            w -= 2;
-            d += 2;
-        }
+	int w;
+	uint8_t *d = byte_line;
+	byte_line += stride;
+	w = byte_width;
+
+
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 2 && ((unsigned long)d & 3))
+	{
+	    *(uint16_t *)d = data;
+	    w -= 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((unsigned long)d & 15))
+	{
+	    *(uint32_t *)d = data;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 128)
+	{
+	    cache_prefetch (((__m128i*)d) + 12);
+
+	    save_128_aligned ((__m128i*)(d),     xmm_def);
+	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 32),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 48),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 64),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 80),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 96),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 112), xmm_def);
+
+	    d += 128;
+	    w -= 128;
+	}
+
+	if (w >= 64)
+	{
+	    cache_prefetch (((__m128i*)d) + 8);
+
+	    save_128_aligned ((__m128i*)(d),     xmm_def);
+	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 32),  xmm_def);
+	    save_128_aligned ((__m128i*)(d + 48),  xmm_def);
+
+	    d += 64;
+	    w -= 64;
+	}
+
+	cache_prefetch_next ((__m128i*)d);
+
+	if (w >= 32)
+	{
+	    save_128_aligned ((__m128i*)(d),     xmm_def);
+	    save_128_aligned ((__m128i*)(d + 16),  xmm_def);
+
+	    d += 32;
+	    w -= 32;
+	}
+
+	if (w >= 16)
+	{
+	    save_128_aligned ((__m128i*)(d),     xmm_def);
+
+	    d += 16;
+	    w -= 16;
+	}
+
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = data;
+
+	    w -= 4;
+	    d += 4;
+	}
+
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = data;
+	    w -= 2;
+	    d += 2;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
     return TRUE;
 }
 
 static void
 sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
-				      pixman_op_t op,
-				     pixman_image_t * src_image,
-				     pixman_image_t * mask_image,
-				     pixman_image_t * dst_image,
-				     int32_t      src_x,
-				     int32_t      src_y,
-				     int32_t      mask_x,
-				     int32_t      mask_y,
-				     int32_t      dest_x,
-				     int32_t      dest_y,
-				     int32_t     width,
-				     int32_t     height)
-{
-    uint32_t	src, srca;
-    uint32_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    uint32_t    m;
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    uint32_t m;
 
     __m128i xmm_src, xmm_def;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
     {
-        pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,
-                        PIXMAN_FORMAT_BPP (dst_image->bits.format),
-                        dest_x, dest_y, width, height, 0);
-        return;
+	pixman_fill_sse2 (dst_image->bits.bits, dst_image->bits.rowstride,
+	                  PIXMAN_FORMAT_BPP (dst_image->bits.format),
+	                  dest_x, dest_y, width, height, 0);
+	return;
     }
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
     xmm_def = create_mask_2x32_128 (src, src);
     xmm_src = expand_pixel_32_1x128 (src);
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            uint8_t m = *mask++;
-
-            if (m)
-            {
-                *dst = pack_1x64_32 (pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
-            }
-            else
-            {
-                *dst = 0;
-            }
-
-            w--;
-            dst++;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)mask);
-            cache_prefetch_next ((__m128i*)dst);
-
-            m = *((uint32_t*)mask);
-
-            if (srca == 0xff && m == 0xffffffff)
-            {
-                save_128_aligned ((__m128i*)dst, xmm_def);
-            }
-            else if (m)
-            {
-                xmm_mask = unpack_32_1x128 (m);
-                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128());
-
-                /* Unpacking */
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-                pix_multiply_2x128 (&xmm_src, &xmm_src, &xmm_mask_lo, &xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
-            }
-            else
-            {
-                save_128_aligned ((__m128i*)dst, _mm_setzero_si128());
-            }
-
-            w -= 4;
-            dst += 4;
-            mask += 4;
-        }
-
-        while (w)
-        {
-            uint8_t m = *mask++;
-
-            if (m)
-            {
-                *dst = pack_1x64_32 (pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
-            }
-            else
-            {
-                *dst = 0;
-            }
-
-            w--;
-            dst++;
-        }
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    uint8_t m = *mask++;
+
+	    if (m)
+	    {
+		*dst = pack_1x64_32 (
+		    pix_multiply_1x64 (
+			_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
+	    }
+	    else
+	    {
+		*dst = 0;
+	    }
+
+	    w--;
+	    dst++;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)mask);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    m = *((uint32_t*)mask);
+
+	    if (srca == 0xff && m == 0xffffffff)
+	    {
+		save_128_aligned ((__m128i*)dst, xmm_def);
+	    }
+	    else if (m)
+	    {
+		xmm_mask = unpack_32_1x128 (m);
+		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+		/* Unpacking */
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+					&xmm_mask_lo, &xmm_mask_hi);
+
+		pix_multiply_2x128 (&xmm_src, &xmm_src,
+				    &xmm_mask_lo, &xmm_mask_hi,
+				    &xmm_mask_lo, &xmm_mask_hi);
+
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_mask_lo, xmm_mask_hi));
+	    }
+	    else
+	    {
+		save_128_aligned ((__m128i*)dst, _mm_setzero_si128 ());
+	    }
+
+	    w -= 4;
+	    dst += 4;
+	    mask += 4;
+	}
+
+	while (w)
+	{
+	    uint8_t m = *mask++;
+
+	    if (m)
+	    {
+		*dst = pack_1x64_32 (
+		    pix_multiply_1x64 (
+			_mm_movepi64_pi64 (xmm_src), expand_pixel_8_1x64 (m)));
+	    }
+	    else
+	    {
+		*dst = 0;
+	    }
+
+	    w--;
+	    dst++;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_8_0565
+/*-----------------------------------------------------------------------
+ * composite_over_n_8_0565
  */
 
 static void
 sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
-				   pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint32_t	src, srca;
-    uint16_t	*dst_line, *dst, d;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint32_t src, srca;
+    uint16_t    *dst_line, *dst, d;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
     uint32_t m;
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
@@ -3637,14 +4107,16 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
 	return;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
@@ -3653,140 +4125,154 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            m = *mask++;
-
-            if (m)
-            {
-                d = *dst;
-                mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
-                mmx_dest = expand565_16_1x64 (d);
-
-                *dst = pack_565_32_16 (pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                                 &mmx_alpha,
-                                                                 &mmx_mask,
-                                                                 &mmx_dest)));
-            }
-
-            w--;
-            dst++;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 8)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)mask);
-            cache_prefetch_next ((__m128i*)dst);
-
-            xmm_dst = load_128_aligned ((__m128i*) dst);
-            unpack_565_128_4x128 (xmm_dst, &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-
-            m = *((uint32_t*)mask);
-            mask += 4;
-
-            if (m)
-            {
-                xmm_mask = unpack_32_1x128 (m);
-                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128());
-
-                /* Unpacking */
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-                in_over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst0, &xmm_dst1);
-            }
-
-            m = *((uint32_t*)mask);
-            mask += 4;
-
-            if (m)
-            {
-                xmm_mask = unpack_32_1x128 (m);
-                xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128());
-
-                /* Unpacking */
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-                in_over_2x128 (&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst2, &xmm_dst3);
-            }
-
-            save_128_aligned ((__m128i*)dst, pack_565_4x128_128 (&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-            w -= 8;
-            dst += 8;
-        }
-
-        while (w)
-        {
-            m = *mask++;
-
-            if (m)
-            {
-                d = *dst;
-                mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
-                mmx_dest = expand565_16_1x64 (d);
-
-                *dst = pack_565_32_16 (pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                                 &mmx_alpha,
-                                                                 &mmx_mask,
-                                                                 &mmx_dest)));
-            }
-
-            w--;
-            dst++;
-        }
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    m = *mask++;
+
+	    if (m)
+	    {
+		d = *dst;
+		mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
+		mmx_dest = expand565_16_1x64 (d);
+
+		*dst = pack_565_32_16 (
+		    pack_1x64_32 (
+			in_over_1x64 (
+			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+	    }
+
+	    w--;
+	    dst++;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 8)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)mask);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    xmm_dst = load_128_aligned ((__m128i*) dst);
+	    unpack_565_128_4x128 (xmm_dst,
+				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+
+	    m = *((uint32_t*)mask);
+	    mask += 4;
+
+	    if (m)
+	    {
+		xmm_mask = unpack_32_1x128 (m);
+		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+		/* Unpacking */
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+					&xmm_mask_lo, &xmm_mask_hi);
+
+		in_over_2x128 (&xmm_src, &xmm_src,
+			       &xmm_alpha, &xmm_alpha,
+			       &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst0, &xmm_dst1);
+	    }
+
+	    m = *((uint32_t*)mask);
+	    mask += 4;
+
+	    if (m)
+	    {
+		xmm_mask = unpack_32_1x128 (m);
+		xmm_mask = _mm_unpacklo_epi8 (xmm_mask, _mm_setzero_si128 ());
+
+		/* Unpacking */
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+					&xmm_mask_lo, &xmm_mask_hi);
+		in_over_2x128 (&xmm_src, &xmm_src,
+			       &xmm_alpha, &xmm_alpha,
+			       &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst2, &xmm_dst3);
+	    }
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_565_4x128_128 (
+		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+	    w -= 8;
+	    dst += 8;
+	}
+
+	while (w)
+	{
+	    m = *mask++;
+
+	    if (m)
+	    {
+		d = *dst;
+		mmx_mask = expand_alpha_rev_1x64 (unpack_32_1x64 (m));
+		mmx_dest = expand565_16_1x64 (d);
+
+		*dst = pack_565_32_16 (
+		    pack_1x64_32 (
+			in_over_1x64 (
+			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+	    }
+
+	    w--;
+	    dst++;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_pixbuf_0565
+/* -----------------------------------------------------------------------
+ * composite_over_pixbuf_0565
  */
 
 static void
 sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
-				   pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint16_t	*dst_line, *dst, d;
-    uint32_t	*src_line, *src, s;
-    int		dst_stride, src_stride;
-    uint16_t	w;
-    uint32_t    opaque, zero;
+                                 pixman_op_t              op,
+                                 pixman_image_t *         src_image,
+                                 pixman_image_t *         mask_image,
+                                 pixman_image_t *         dst_image,
+                                 int32_t                  src_x,
+                                 int32_t                  src_y,
+                                 int32_t                  mask_x,
+                                 int32_t                  mask_y,
+                                 int32_t                  dest_x,
+                                 int32_t                  dest_y,
+                                 int32_t                  width,
+                                 int32_t                  height)
+{
+    uint16_t    *dst_line, *dst, d;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint32_t opaque, zero;
 
     __m64 ms;
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst0, xmm_dst1, xmm_dst2, xmm_dst3;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
 #if 0
     /* FIXME
@@ -3799,126 +4285,139 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        src = src_line;
-        src_line += src_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            s = *src++;
-            d = *dst;
-
-            ms = unpack_32_1x64 (s);
-
-            *dst++ = pack_565_32_16 (pack_1x64_32 (over_rev_non_pre_1x64(ms, expand565_16_1x64 (d))));
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 8)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)src);
-            cache_prefetch_next ((__m128i*)dst);
-
-            /* First round */
-            xmm_src = load_128_unaligned((__m128i*)src);
-            xmm_dst = load_128_aligned  ((__m128i*)dst);
-
-            opaque = is_opaque (xmm_src);
-	    zero = is_zero (xmm_src);
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    s = *src++;
+	    d = *dst;
+
+	    ms = unpack_32_1x64 (s);
+
+	    *dst++ = pack_565_32_16 (
+		pack_1x64_32 (
+		    over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 8)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)src);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    /* First round */
+	    xmm_src = load_128_unaligned ((__m128i*)src);
+	    xmm_dst = load_128_aligned  ((__m128i*)dst);
 
-	    unpack_565_128_4x128 (xmm_dst, &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-
-            /* preload next round*/
-            xmm_src = load_128_unaligned((__m128i*)(src+4));
-	    
-            if (opaque)
-            {
-                invert_colors_2x128 (xmm_src_lo, xmm_src_hi, &xmm_dst0, &xmm_dst1);
-            }
-            else if (!zero)
-            {
-                over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, &xmm_dst0, &xmm_dst1);
-            }
-
-            /* Second round */
 	    opaque = is_opaque (xmm_src);
 	    zero = is_zero (xmm_src);
 
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-
-            if (opaque)
-            {
-                invert_colors_2x128 (xmm_src_lo, xmm_src_hi, &xmm_dst2, &xmm_dst3);
-            }
-            else if (zero)
-            {
-                over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, &xmm_dst2, &xmm_dst3);
-            }
-
-            save_128_aligned ((__m128i*)dst, pack_565_4x128_128 (&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-            w -= 8;
-            src += 8;
-            dst += 8;
-        }
-
-        while (w)
-        {
-            s = *src++;
-            d = *dst;
-
-            ms = unpack_32_1x64 (s);
+	    unpack_565_128_4x128 (xmm_dst,
+				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+
+	    /* preload next round*/
+	    xmm_src = load_128_unaligned ((__m128i*)(src + 4));
+
+	    if (opaque)
+	    {
+		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+				     &xmm_dst0, &xmm_dst1);
+	    }
+	    else if (!zero)
+	    {
+		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+					&xmm_dst0, &xmm_dst1);
+	    }
+
+	    /* Second round */
+	    opaque = is_opaque (xmm_src);
+	    zero = is_zero (xmm_src);
 
-            *dst++ = pack_565_32_16 (pack_1x64_32 (over_rev_non_pre_1x64(ms, expand565_16_1x64 (d))));
-            w--;
-        }
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+
+	    if (opaque)
+	    {
+		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+				     &xmm_dst2, &xmm_dst3);
+	    }
+	    else if (zero)
+	    {
+		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+					&xmm_dst2, &xmm_dst3);
+	    }
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_565_4x128_128 (
+		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+	    w -= 8;
+	    src += 8;
+	    dst += 8;
+	}
+
+	while (w)
+	{
+	    s = *src++;
+	    d = *dst;
+
+	    ms = unpack_32_1x64 (s);
+
+	    *dst++ = pack_565_32_16 (
+		pack_1x64_32 (
+		    over_rev_non_pre_1x64 (ms, expand565_16_1x64 (d))));
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_over_pixbuf_8888
+/* -------------------------------------------------------------------------
+ * composite_over_pixbuf_8888
  */
 
 static void
 sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
-				   pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint32_t	*dst_line, *dst, d;
-    uint32_t	*src_line, *src, s;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint32_t    opaque, zero;
+                                 pixman_op_t              op,
+                                 pixman_image_t *         src_image,
+                                 pixman_image_t *         mask_image,
+                                 pixman_image_t *         dst_image,
+                                 int32_t                  src_x,
+                                 int32_t                  src_y,
+                                 int32_t                  mask_x,
+                                 int32_t                  mask_y,
+                                 int32_t                  dest_x,
+                                 int32_t                  dest_y,
+                                 int32_t                  width,
+                                 int32_t                  height)
+{
+    uint32_t    *dst_line, *dst, d;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint32_t opaque, zero;
 
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
 #if 0
     /* FIXME
@@ -3931,102 +4430,110 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        src = src_line;
-        src_line += src_stride;
-        w = width;
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
 
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
 
-        while (w && (unsigned long)dst & 15)
-        {
-            s = *src++;
-            d = *dst;
+	while (w && (unsigned long)dst & 15)
+	{
+	    s = *src++;
+	    d = *dst;
 
-            *dst++ = pack_1x64_32 (over_rev_non_pre_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    *dst++ = pack_1x64_32 (
+		over_rev_non_pre_1x64 (
+		    unpack_32_1x64 (s), unpack_32_1x64 (d)));
 
-            w--;
-        }
+	    w--;
+	}
 
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
 
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)src);
-            cache_prefetch_next ((__m128i*)dst);
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)src);
+	    cache_prefetch_next ((__m128i*)dst);
 
-            xmm_src_hi = load_128_unaligned((__m128i*)src);
+	    xmm_src_hi = load_128_unaligned ((__m128i*)src);
 
-            opaque = is_opaque (xmm_src_hi);
+	    opaque = is_opaque (xmm_src_hi);
 	    zero = is_zero (xmm_src_hi);
 
-            unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_src_hi, &xmm_src_lo, &xmm_src_hi);
 
-            if (opaque)
-            {
-                invert_colors_2x128( xmm_src_lo, xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi);
+	    if (opaque)
+	    {
+		invert_colors_2x128 (xmm_src_lo, xmm_src_hi,
+				     &xmm_dst_lo, &xmm_dst_hi);
 
-                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-            }
-            else if (!zero)
-            {
-                xmm_dst_hi = load_128_aligned  ((__m128i*)dst);
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
+	    else if (!zero)
+	    {
+		xmm_dst_hi = load_128_aligned  ((__m128i*)dst);
 
-                unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
+		unpack_128_2x128 (xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
 
-                over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi);
+		over_rev_non_pre_2x128 (xmm_src_lo, xmm_src_hi,
+					&xmm_dst_lo, &xmm_dst_hi);
 
-                save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-            }
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
 
-            w -= 4;
-            dst += 4;
-            src += 4;
-        }
+	    w -= 4;
+	    dst += 4;
+	    src += 4;
+	}
 
-        while (w)
-        {
-            s = *src++;
-            d = *dst;
+	while (w)
+	{
+	    s = *src++;
+	    d = *dst;
 
-            *dst++ = pack_1x64_32 (over_rev_non_pre_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    *dst++ = pack_1x64_32 (
+		over_rev_non_pre_1x64 (
+		    unpack_32_1x64 (s), unpack_32_1x64 (d)));
 
-            w--;
-        }
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 /* -------------------------------------------------------------------------------------------------
- * fast_composite_over_n_8888_0565_ca
+ * composite_over_n_8888_0565_ca
  */
 
 static void
 sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-				       pixman_op_t op,
-				      pixman_image_t * src_image,
-				      pixman_image_t * mask_image,
-				      pixman_image_t * dst_image,
-				      int32_t      src_x,
-				      int32_t      src_y,
-				      int32_t      mask_x,
-				      int32_t      mask_y,
-				      int32_t      dest_x,
-				      int32_t      dest_y,
-				      int32_t     width,
-				      int32_t     height)
-{
-    uint32_t	src;
-    uint16_t	*dst_line, *dst, d;
-    uint32_t	*mask_line, *mask, m;
-    int	dst_stride, mask_stride;
+                                    pixman_op_t              op,
+                                    pixman_image_t *         src_image,
+                                    pixman_image_t *         mask_image,
+                                    pixman_image_t *         dst_image,
+                                    int32_t                  src_x,
+                                    int32_t                  src_y,
+                                    int32_t                  mask_x,
+                                    int32_t                  mask_y,
+                                    int32_t                  dest_x,
+                                    int32_t                  dest_y,
+                                    int32_t                  width,
+                                    int32_t                  height)
+{
+    uint32_t src;
+    uint16_t    *dst_line, *dst, d;
+    uint32_t    *mask_line, *mask, m;
+    int dst_stride, mask_stride;
     int w;
     uint32_t pack_cmp;
 
@@ -4036,13 +4543,15 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 
     __m64 mmx_src, mmx_alpha, mmx_mask, mmx_dest;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (src == 0)
-        return;
+	return;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
     xmm_src = expand_pixel_32_1x128 (src);
     xmm_alpha = expand_alpha_1x128 (xmm_src);
@@ -4051,510 +4560,559 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 
     while (height--)
     {
-        w = width;
-        mask = mask_line;
-        dst = dst_line;
-        mask_line += mask_stride;
-        dst_line += dst_stride;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && ((unsigned long)dst & 15))
-        {
-            m = *(uint32_t *) mask;
-
-            if (m)
-            {
-                d = *dst;
-                mmx_mask = unpack_32_1x64 (m);
-                mmx_dest = expand565_16_1x64 (d);
-
-                *dst = pack_565_32_16 (pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                                 &mmx_alpha,
-                                                                 &mmx_mask,
-                                                                 &mmx_dest)));
-            }
-
-            w--;
-            dst++;
-            mask++;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 8)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)mask);
-            cache_prefetch_next ((__m128i*)dst);
-
-            /* First round */
-            xmm_mask = load_128_unaligned((__m128i*)mask);
-            xmm_dst = load_128_aligned((__m128i*)dst);
-
-            pack_cmp = _mm_movemask_epi8 (_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128()));
-
-            unpack_565_128_4x128 (xmm_dst, &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
-            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-            /* preload next round*/
-            xmm_mask = load_128_unaligned((__m128i*)(mask+4));
-            /* preload next round*/
-
-            if (pack_cmp != 0xffff)
-            {
-                in_over_2x128(&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst0, &xmm_dst1);
-            }
-
-            /* Second round */
-            pack_cmp = _mm_movemask_epi8 (_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128()));
-
-            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-
-            if (pack_cmp != 0xffff)
-            {
-                in_over_2x128(&xmm_src, &xmm_src, &xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_dst2, &xmm_dst3);
-            }
-
-            save_128_aligned ((__m128i*)dst, pack_565_4x128_128 (&xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
-
-            w -= 8;
-            dst += 8;
-            mask += 8;
-        }
-
-        while (w)
-        {
-            m = *(uint32_t *) mask;
-
-            if (m)
-            {
-                d = *dst;
-                mmx_mask = unpack_32_1x64 (m);
-                mmx_dest = expand565_16_1x64 (d);
-
-                *dst = pack_565_32_16 (pack_1x64_32 (in_over_1x64 (&mmx_src,
-                                                                 &mmx_alpha,
-                                                                 &mmx_mask,
-                                                                 &mmx_dest)));
-            }
-
-            w--;
-            dst++;
-            mask++;
-        }
+	w = width;
+	mask = mask_line;
+	dst = dst_line;
+	mask_line += mask_stride;
+	dst_line += dst_stride;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && ((unsigned long)dst & 15))
+	{
+	    m = *(uint32_t *) mask;
+
+	    if (m)
+	    {
+		d = *dst;
+		mmx_mask = unpack_32_1x64 (m);
+		mmx_dest = expand565_16_1x64 (d);
+
+		*dst = pack_565_32_16 (
+		    pack_1x64_32 (
+			in_over_1x64 (
+			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+	    }
+
+	    w--;
+	    dst++;
+	    mask++;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 8)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)mask);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    /* First round */
+	    xmm_mask = load_128_unaligned ((__m128i*)mask);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    pack_cmp = _mm_movemask_epi8 (
+		_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+	    unpack_565_128_4x128 (xmm_dst,
+				  &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3);
+	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+	    /* preload next round */
+	    xmm_mask = load_128_unaligned ((__m128i*)(mask + 4));
+
+	    /* preload next round */
+	    if (pack_cmp != 0xffff)
+	    {
+		in_over_2x128 (&xmm_src, &xmm_src,
+			       &xmm_alpha, &xmm_alpha,
+			       &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst0, &xmm_dst1);
+	    }
+
+	    /* Second round */
+	    pack_cmp = _mm_movemask_epi8 (
+		_mm_cmpeq_epi32 (xmm_mask, _mm_setzero_si128 ()));
+
+	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+
+	    if (pack_cmp != 0xffff)
+	    {
+		in_over_2x128 (&xmm_src, &xmm_src,
+			       &xmm_alpha, &xmm_alpha,
+			       &xmm_mask_lo, &xmm_mask_hi,
+			       &xmm_dst2, &xmm_dst3);
+	    }
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_565_4x128_128 (
+		    &xmm_dst0, &xmm_dst1, &xmm_dst2, &xmm_dst3));
+
+	    w -= 8;
+	    dst += 8;
+	    mask += 8;
+	}
+
+	while (w)
+	{
+	    m = *(uint32_t *) mask;
+
+	    if (m)
+	    {
+		d = *dst;
+		mmx_mask = unpack_32_1x64 (m);
+		mmx_dest = expand565_16_1x64 (d);
+
+		*dst = pack_565_32_16 (
+		    pack_1x64_32 (
+			in_over_1x64 (
+			    &mmx_src, &mmx_alpha, &mmx_mask, &mmx_dest)));
+	    }
+
+	    w--;
+	    dst++;
+	    mask++;
+	}
     }
 
     _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_in_n_8_8
+/* -----------------------------------------------------------------------
+ * composite_in_n_8_8
  */
 
 static void
 sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
-			 pixman_op_t op,
-			pixman_image_t * src_image,
-			pixman_image_t * mask_image,
-			pixman_image_t * dst_image,
-			int32_t      src_x,
-			int32_t      src_y,
-			int32_t      mask_x,
-			int32_t      mask_y,
-			int32_t      dest_x,
-			int32_t      dest_y,
-			int32_t     width,
-			int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w, d, m;
-    uint32_t	src;
-    uint8_t	sa;
+                         pixman_op_t              op,
+                         pixman_image_t *         src_image,
+                         pixman_image_t *         mask_image,
+                         pixman_image_t *         dst_image,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w, d, m;
+    uint32_t src;
+    uint8_t sa;
 
     __m128i xmm_alpha;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     sa = src >> 24;
     if (sa == 0)
-        return;
+	return;
 
     xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && ((unsigned long)dst & 15))
-        {
-            m = (uint32_t) *mask++;
-            d = (uint32_t) *dst;
-
-            *dst++ = (uint8_t) pack_1x64_32 (pix_multiply_1x64 (pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-                                                               unpack_32_1x64 (d)));
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 16)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)mask);
-            cache_prefetch_next ((__m128i*)dst);
-
-            xmm_mask = load_128_unaligned((__m128i*)mask);
-            xmm_dst = load_128_aligned((__m128i*)dst);
-
-            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-            pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-            pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-            save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-            mask += 16;
-            dst += 16;
-            w -= 16;
-        }
-
-        while (w)
-        {
-            m = (uint32_t) *mask++;
-            d = (uint32_t) *dst;
-
-            *dst++ = (uint8_t) pack_1x64_32 (pix_multiply_1x64 (pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-                                                               unpack_32_1x64 (d)));
-            w--;
-        }
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && ((unsigned long)dst & 15))
+	{
+	    m = (uint32_t) *mask++;
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		pix_multiply_1x64 (
+		    pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha),
+				       unpack_32_1x64 (m)),
+		    unpack_32_1x64 (d)));
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 16)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)mask);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    xmm_mask = load_128_unaligned ((__m128i*)mask);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+				&xmm_mask_lo, &xmm_mask_hi,
+				&xmm_mask_lo, &xmm_mask_hi);
+
+	    pix_multiply_2x128 (&xmm_mask_lo, &xmm_mask_hi,
+				&xmm_dst_lo, &xmm_dst_hi,
+				&xmm_dst_lo, &xmm_dst_hi);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	    mask += 16;
+	    dst += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    m = (uint32_t) *mask++;
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		pix_multiply_1x64 (
+		    pix_multiply_1x64 (
+			_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
+		    unpack_32_1x64 (d)));
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_in_8_8
+/* ---------------------------------------------------------------------------
+ * composite_in_8_8
  */
 
 static void
 sse2_composite_in_8_8 (pixman_implementation_t *imp,
-		       pixman_op_t op,
-		      pixman_image_t * src_image,
-		      pixman_image_t * mask_image,
-		      pixman_image_t * dst_image,
-		      int32_t      src_x,
-		      int32_t      src_y,
-		      int32_t      mask_x,
-		      int32_t      mask_y,
-		      int32_t      dest_x,
-		      int32_t      dest_y,
-		      int32_t     width,
-		      int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	src_stride, dst_stride;
-    uint16_t	w;
-    uint32_t    s, d;
+                       pixman_op_t              op,
+                       pixman_image_t *         src_image,
+                       pixman_image_t *         mask_image,
+                       pixman_image_t *         dst_image,
+                       int32_t                  src_x,
+                       int32_t                  src_y,
+                       int32_t                  mask_x,
+                       int32_t                  mask_y,
+                       int32_t                  dest_x,
+                       int32_t                  dest_y,
+                       int32_t                  width,
+                       int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int src_stride, dst_stride;
+    uint16_t w;
+    uint32_t s, d;
 
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        src = src_line;
-        src_line += src_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && ((unsigned long)dst & 15))
-        {
-            s = (uint32_t) *src++;
-            d = (uint32_t) *dst;
-
-            *dst++ = (uint8_t) pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (s),unpack_32_1x64 (d)));
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 16)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)src);
-            cache_prefetch_next ((__m128i*)dst);
-
-            xmm_src = load_128_unaligned((__m128i*)src);
-            xmm_dst = load_128_aligned((__m128i*)dst);
-
-            unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-            pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi, &xmm_dst_lo, &xmm_dst_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-            save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-            src += 16;
-            dst += 16;
-            w -= 16;
-        }
-
-        while (w)
-        {
-            s = (uint32_t) *src++;
-            d = (uint32_t) *dst;
-
-            *dst++ = (uint8_t) pack_1x64_32 (pix_multiply_1x64 (unpack_32_1x64 (s),unpack_32_1x64 (d)));
-            w--;
-        }
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && ((unsigned long)dst & 15))
+	{
+	    s = (uint32_t) *src++;
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		pix_multiply_1x64 (
+		    unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 16)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)src);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    xmm_src = load_128_unaligned ((__m128i*)src);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+	    pix_multiply_2x128 (&xmm_src_lo, &xmm_src_hi,
+				&xmm_dst_lo, &xmm_dst_hi,
+				&xmm_dst_lo, &xmm_dst_hi);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	    src += 16;
+	    dst += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    s = (uint32_t) *src++;
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		pix_multiply_1x64 (unpack_32_1x64 (s), unpack_32_1x64 (d)));
+	    w--;
+	}
     }
 
     _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_add_8888_8_8
+/* -------------------------------------------------------------------------
+ * composite_add_8888_8_8
  */
 
 static void
 sse2_composite_add_8888_8_8 (pixman_implementation_t *imp,
-				pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t      src_x,
-			       int32_t      src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    uint32_t	src;
-    uint8_t	sa;
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    uint32_t src;
+    uint8_t sa;
     uint32_t m, d;
 
     __m128i xmm_alpha;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     sa = src >> 24;
     if (sa == 0)
-        return;
+	return;
 
     xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w && ((unsigned long)dst & 15))
-        {
-            m = (uint32_t) *mask++;
-            d = (uint32_t) *dst;
-
-            *dst++ = (uint8_t) pack_1x64_32 (_mm_adds_pu16 (pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-                                                                              unpack_32_1x64 (d)));
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)mask);
-        cache_prefetch ((__m128i*)dst);
-
-        while (w >= 16)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)mask);
-            cache_prefetch_next ((__m128i*)dst);
-
-            xmm_mask = load_128_unaligned((__m128i*)mask);
-            xmm_dst = load_128_aligned((__m128i*)dst);
-
-            unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-            unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-            pix_multiply_2x128 (&xmm_alpha, &xmm_alpha, &xmm_mask_lo, &xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-            xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
-            xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
-
-            save_128_aligned ((__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-
-            mask += 16;
-            dst += 16;
-            w -= 16;
-        }
-
-        while (w)
-        {
-            m = (uint32_t) *mask++;
-            d = (uint32_t) *dst;
-
-            *dst++ = (uint8_t) pack_1x64_32 (_mm_adds_pu16 (pix_multiply_1x64 (_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
-                                                                              unpack_32_1x64 (d)));
-            w--;
-        }
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && ((unsigned long)dst & 15))
+	{
+	    m = (uint32_t) *mask++;
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		_mm_adds_pu16 (
+		    pix_multiply_1x64 (
+			_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
+		    unpack_32_1x64 (d)));
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)mask);
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 16)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)mask);
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    xmm_mask = load_128_unaligned ((__m128i*)mask);
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+				&xmm_mask_lo, &xmm_mask_hi,
+				&xmm_mask_lo, &xmm_mask_hi);
+
+	    xmm_dst_lo = _mm_adds_epu16 (xmm_mask_lo, xmm_dst_lo);
+	    xmm_dst_hi = _mm_adds_epu16 (xmm_mask_hi, xmm_dst_hi);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	    mask += 16;
+	    dst += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    m = (uint32_t) *mask++;
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		_mm_adds_pu16 (
+		    pix_multiply_1x64 (
+			_mm_movepi64_pi64 (xmm_alpha), unpack_32_1x64 (m)),
+		    unpack_32_1x64 (d)));
+
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_add_8000_8000
+/* ----------------------------------------------------------------------
+ * composite_add_8000_8000
  */
 
 static void
 sse2_composite_add_8000_8000 (pixman_implementation_t *imp,
-				 pixman_op_t op,
-				pixman_image_t * src_image,
-				pixman_image_t * mask_image,
-				pixman_image_t * dst_image,
-				int32_t      src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t     width,
-				int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint16_t	t;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint16_t t;
+
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
     while (height--)
     {
-        dst = dst_line;
-        src = src_line;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-
-        dst_line += dst_stride;
-        src_line += src_stride;
-        w = width;
-
-        /* Small head */
-        while (w && (unsigned long)dst & 3)
-        {
-            t = (*dst) + (*src++);
-            *dst++ = t | (0 - (t >> 8));
-            w--;
-        }
-
-        core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
-
-        /* Small tail */
-        dst += w & 0xfffc;
-        src += w & 0xfffc;
-
-        w &= 3;
-
-        while (w)
-        {
-            t = (*dst) + (*src++);
-            *dst++ = t | (0 - (t >> 8));
-            w--;
-        }
+	dst = dst_line;
+	src = src_line;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+
+	dst_line += dst_stride;
+	src_line += src_stride;
+	w = width;
+
+	/* Small head */
+	while (w && (unsigned long)dst & 3)
+	{
+	    t = (*dst) + (*src++);
+	    *dst++ = t | (0 - (t >> 8));
+	    w--;
+	}
+
+	core_combine_add_u_sse2 ((uint32_t*)dst, (uint32_t*)src, NULL, w >> 2);
+
+	/* Small tail */
+	dst += w & 0xfffc;
+	src += w & 0xfffc;
+
+	w &= 3;
+
+	while (w)
+	{
+	    t = (*dst) + (*src++);
+	    *dst++ = t | (0 - (t >> 8));
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* -------------------------------------------------------------------------------------------------
- * fast_composite_add_8888_8888
+/* ---------------------------------------------------------------------
+ * composite_add_8888_8888
  */
 static void
 sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
-				 pixman_op_t 	op,
-				pixman_image_t *	src_image,
-				pixman_image_t *	mask_image,
-				pixman_image_t *	 dst_image,
-				int32_t		 src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t     width,
-				int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
 
     while (height--)
     {
-        dst = dst_line;
-        dst_line += dst_stride;
-        src = src_line;
-        src_line += src_stride;
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
 
-        core_combine_add_u_sse2 (dst, src, NULL, width);
+	core_combine_add_u_sse2 (dst, src, NULL, width);
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 /* -------------------------------------------------------------------------------------------------
@@ -4563,45 +5121,48 @@ sse2_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 static pixman_bool_t
 pixman_blt_sse2 (uint32_t *src_bits,
-	       uint32_t *dst_bits,
-	       int src_stride,
-	       int dst_stride,
-	       int src_bpp,
-	       int dst_bpp,
-	       int src_x, int src_y,
-	       int dst_x, int dst_y,
-	       int width, int height)
-{
-    uint8_t *	src_bytes;
-    uint8_t *	dst_bytes;
-    int		byte_width;
+                 uint32_t *dst_bits,
+                 int       src_stride,
+                 int       dst_stride,
+                 int       src_bpp,
+                 int       dst_bpp,
+                 int       src_x,
+                 int       src_y,
+                 int       dst_x,
+                 int       dst_y,
+                 int       width,
+                 int       height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
 
     if (src_bpp != dst_bpp)
-        return FALSE;
+	return FALSE;
 
     if (src_bpp == 16)
     {
-        src_stride = src_stride * (int) sizeof (uint32_t) / 2;
-        dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
-        src_bytes = (uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
-        dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-        byte_width = 2 * width;
-        src_stride *= 2;
-        dst_stride *= 2;
+	src_stride = src_stride * (int) sizeof (uint32_t) / 2;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 2;
+	src_bytes =(uint8_t *)(((uint16_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint16_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+	byte_width = 2 * width;
+	src_stride *= 2;
+	dst_stride *= 2;
     }
     else if (src_bpp == 32)
     {
-        src_stride = src_stride * (int) sizeof (uint32_t) / 4;
-        dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
-        src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
-        dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
-        byte_width = 4 * width;
-        src_stride *= 4;
-        dst_stride *= 4;
+	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
+	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
+	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
+	dst_bytes = (uint8_t *)(((uint32_t *)dst_bits) + dst_stride * (dst_y) + (dst_x));
+	byte_width = 4 * width;
+	src_stride *= 4;
+	dst_stride *= 4;
     }
     else
     {
-        return FALSE;
+	return FALSE;
     }
 
     cache_prefetch ((__m128i*)src_bytes);
@@ -4609,345 +5170,357 @@ pixman_blt_sse2 (uint32_t *src_bits,
 
     while (height--)
     {
-        int w;
-        uint8_t *s = src_bytes;
-        uint8_t *d = dst_bytes;
-        src_bytes += src_stride;
-        dst_bytes += dst_stride;
-        w = byte_width;
-
-        cache_prefetch_next ((__m128i*)s);
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 2 && ((unsigned long)d & 3))
-        {
-            *(uint16_t *)d = *(uint16_t *)s;
-            w -= 2;
-            s += 2;
-            d += 2;
-        }
-
-        while (w >= 4 && ((unsigned long)d & 15))
-        {
-            *(uint32_t *)d = *(uint32_t *)s;
-
-            w -= 4;
-            s += 4;
-            d += 4;
-        }
-
-        cache_prefetch_next ((__m128i*)s);
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 64)
-        {
-            __m128i xmm0, xmm1, xmm2, xmm3;
-
-            /* 128 bytes ahead */
-            cache_prefetch (((__m128i*)s) + 8);
-            cache_prefetch (((__m128i*)d) + 8);
-
-            xmm0 = load_128_unaligned ((__m128i*)(s));
-            xmm1 = load_128_unaligned ((__m128i*)(s+16));
-            xmm2 = load_128_unaligned ((__m128i*)(s+32));
-            xmm3 = load_128_unaligned ((__m128i*)(s+48));
-
-            save_128_aligned ((__m128i*)(d),    xmm0);
-            save_128_aligned ((__m128i*)(d+16), xmm1);
-            save_128_aligned ((__m128i*)(d+32), xmm2);
-            save_128_aligned ((__m128i*)(d+48), xmm3);
-
-            s += 64;
-            d += 64;
-            w -= 64;
-        }
-
-        cache_prefetch_next ((__m128i*)s);
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 16)
-        {
-            save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
-
-            w -= 16;
-            d += 16;
-            s += 16;
-        }
-
-        cache_prefetch_next ((__m128i*)s);
-        cache_prefetch_next ((__m128i*)d);
-
-        while (w >= 4)
-        {
-            *(uint32_t *)d = *(uint32_t *)s;
-
-            w -= 4;
-            s += 4;
-            d += 4;
-        }
-
-        if (w >= 2)
-        {
-            *(uint16_t *)d = *(uint16_t *)s;
-            w -= 2;
-            s += 2;
-            d += 2;
-        }
+	int w;
+	uint8_t *s = src_bytes;
+	uint8_t *d = dst_bytes;
+	src_bytes += src_stride;
+	dst_bytes += dst_stride;
+	w = byte_width;
+
+	cache_prefetch_next ((__m128i*)s);
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 2 && ((unsigned long)d & 3))
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
+
+	while (w >= 4 && ((unsigned long)d & 15))
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	cache_prefetch_next ((__m128i*)s);
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 64)
+	{
+	    __m128i xmm0, xmm1, xmm2, xmm3;
+
+	    /* 128 bytes ahead */
+	    cache_prefetch (((__m128i*)s) + 8);
+	    cache_prefetch (((__m128i*)d) + 8);
+
+	    xmm0 = load_128_unaligned ((__m128i*)(s));
+	    xmm1 = load_128_unaligned ((__m128i*)(s + 16));
+	    xmm2 = load_128_unaligned ((__m128i*)(s + 32));
+	    xmm3 = load_128_unaligned ((__m128i*)(s + 48));
+
+	    save_128_aligned ((__m128i*)(d),    xmm0);
+	    save_128_aligned ((__m128i*)(d + 16), xmm1);
+	    save_128_aligned ((__m128i*)(d + 32), xmm2);
+	    save_128_aligned ((__m128i*)(d + 48), xmm3);
+
+	    s += 64;
+	    d += 64;
+	    w -= 64;
+	}
+
+	cache_prefetch_next ((__m128i*)s);
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 16)
+	{
+	    save_128_aligned ((__m128i*)d, load_128_unaligned ((__m128i*)s) );
+
+	    w -= 16;
+	    d += 16;
+	    s += 16;
+	}
+
+	cache_prefetch_next ((__m128i*)s);
+	cache_prefetch_next ((__m128i*)d);
+
+	while (w >= 4)
+	{
+	    *(uint32_t *)d = *(uint32_t *)s;
+
+	    w -= 4;
+	    s += 4;
+	    d += 4;
+	}
+
+	if (w >= 2)
+	{
+	    *(uint16_t *)d = *(uint16_t *)s;
+	    w -= 2;
+	    s += 2;
+	    d += 2;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 
     return TRUE;
 }
 
 static void
 sse2_composite_copy_area (pixman_implementation_t *imp,
-			 pixman_op_t       op,
-			pixman_image_t *	src_image,
-			pixman_image_t *	mask_image,
-			pixman_image_t *	dst_image,
-			int32_t		src_x,
-			int32_t		src_y,
-			int32_t		mask_x,
-			int32_t		mask_y,
-			int32_t		dest_x,
-			int32_t		dest_y,
-			int32_t		width,
-			int32_t		height)
+                          pixman_op_t              op,
+                          pixman_image_t *         src_image,
+                          pixman_image_t *         mask_image,
+                          pixman_image_t *         dst_image,
+                          int32_t                  src_x,
+                          int32_t                  src_y,
+                          int32_t                  mask_x,
+                          int32_t                  mask_y,
+                          int32_t                  dest_x,
+                          int32_t                  dest_y,
+                          int32_t                  width,
+                          int32_t                  height)
 {
     pixman_blt_sse2 (src_image->bits.bits,
-		    dst_image->bits.bits,
-		    src_image->bits.rowstride,
-		    dst_image->bits.rowstride,
-		    PIXMAN_FORMAT_BPP (src_image->bits.format),
-		    PIXMAN_FORMAT_BPP (dst_image->bits.format),
-		    src_x, src_y, dest_x, dest_y, width, height);
+                     dst_image->bits.bits,
+                     src_image->bits.rowstride,
+                     dst_image->bits.rowstride,
+                     PIXMAN_FORMAT_BPP (src_image->bits.format),
+                     PIXMAN_FORMAT_BPP (dst_image->bits.format),
+                     src_x, src_y, dest_x, dest_y, width, height);
 }
 
 #if 0
 /* This code are buggy in MMX version, now the bug was translated to SSE2 version */
 void
 sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-				 pixman_op_t      op,
-				pixman_image_t * src_image,
-				pixman_image_t * mask_image,
-				pixman_image_t * dst_image,
-				int32_t      src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t     width,
-				int32_t     height)
-{
-    uint32_t	*src, *src_line, s;
+                                 pixman_op_t              op,
+                                 pixman_image_t *         src_image,
+                                 pixman_image_t *         mask_image,
+                                 pixman_image_t *         dst_image,
+                                 int32_t                  src_x,
+                                 int32_t                  src_y,
+                                 int32_t                  mask_x,
+                                 int32_t                  mask_y,
+                                 int32_t                  dest_x,
+                                 int32_t                  dest_y,
+                                 int32_t                  width,
+                                 int32_t                  height)
+{
+    uint32_t    *src, *src_line, s;
     uint32_t    *dst, *dst_line, d;
-    uint8_t	    *mask, *mask_line;
-    uint32_t    m;
-    int		 src_stride, mask_stride, dst_stride;
+    uint8_t         *mask, *mask_line;
+    uint32_t m;
+    int src_stride, mask_stride, dst_stride;
     uint16_t w;
 
     __m128i xmm_src, xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask, xmm_mask_lo, xmm_mask_hi;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
     while (height--)
     {
-        src = src_line;
-        src_line += src_stride;
-        dst = dst_line;
-        dst_line += dst_stride;
-        mask = mask_line;
-        mask_line += mask_stride;
-
-        w = width;
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-        cache_prefetch ((__m128i*)mask);
-
-        while (w && (unsigned long)dst & 15)
-        {
-            s = 0xff000000 | *src++;
-            m = (uint32_t) *mask++;
-            d = *dst;
-
-            __m64 ms = unpack_32_1x64 (s);
-
-            if (m != 0xff)
-            {
-                ms = in_over_1x64 (ms,
-                                  mask_x00ff,
-                                  expand_alpha_rev_1x64 (unpack_32_1x64 (m)),
-                                  unpack_32_1x64 (d));
-            }
-
-            *dst++ = pack_1x64_32 (ms);
-            w--;
-        }
-
-        /* call prefetch hint to optimize cache load*/
-        cache_prefetch ((__m128i*)src);
-        cache_prefetch ((__m128i*)dst);
-        cache_prefetch ((__m128i*)mask);
-
-        while (w >= 4)
-        {
-            /* fill cache line with next memory */
-            cache_prefetch_next ((__m128i*)src);
-            cache_prefetch_next ((__m128i*)dst);
-            cache_prefetch_next ((__m128i*)mask);
-
-            m = *(uint32_t*) mask;
-            xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
-
-            if (m == 0xffffffff)
-            {
-                save_128_aligned ((__m128i*)dst, xmm_src);
-            }
-            else
-            {
-                xmm_dst = load_128_aligned ((__m128i*)dst);
-
-                xmm_mask = _mm_unpacklo_epi16 (unpack_32_1x128 (m), _mm_setzero_si128());
-
-                unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
-                unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
-                unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
-
-                expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi, &xmm_mask_lo, &xmm_mask_hi);
-
-                in_over_2x128 (xmm_src_lo, xmm_src_hi, mask_00ff, mask_00ff, xmm_mask_lo, xmm_mask_hi, &xmm_dst_lo, &xmm_dst_hi);
-
-                save_128_aligned( (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
-            }
-
-            src += 4;
-            dst += 4;
-            mask += 4;
-            w -= 4;
-        }
-
-        while (w)
-        {
-            m = (uint32_t) *mask++;
-
-            if (m)
-            {
-                s = 0xff000000 | *src;
-
-                if (m == 0xff)
-                {
-                    *dst = s;
-                }
-                else
-                {
-                    d = *dst;
-
-                    *dst = pack_1x64_32 (in_over_1x64 (unpack_32_1x64 (s),
-                                                      mask_x00ff,
-                                                      expand_alpha_rev_1x64 (unpack_32_1x64 (m)),
-                                                      unpack_32_1x64 (d)));
-                }
-
-            }
-
-            src++;
-            dst++;
-            w--;
-        }
+	src = src_line;
+	src_line += src_stride;
+	dst = dst_line;
+	dst_line += dst_stride;
+	mask = mask_line;
+	mask_line += mask_stride;
+
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)mask);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    s = 0xff000000 | *src++;
+	    m = (uint32_t) *mask++;
+	    d = *dst;
+
+	    __m64 ms = unpack_32_1x64 (s);
+
+	    if (m != 0xff)
+	    {
+		ms = in_over_1x64 (ms,
+		                   mask_x00ff,
+		                   expand_alpha_rev_1x64 (unpack_32_1x64 (m)),
+		                   unpack_32_1x64 (d));
+	    }
+
+	    *dst++ = pack_1x64_32 (ms);
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+	cache_prefetch ((__m128i*)dst);
+	cache_prefetch ((__m128i*)mask);
+
+	while (w >= 4)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)src);
+	    cache_prefetch_next ((__m128i*)dst);
+	    cache_prefetch_next ((__m128i*)mask);
+
+	    m = *(uint32_t*) mask;
+	    xmm_src = _mm_or_si128 (load_128_unaligned ((__m128i*)src), mask_ff000000);
+
+	    if (m == 0xffffffff)
+	    {
+		save_128_aligned ((__m128i*)dst, xmm_src);
+	    }
+	    else
+	    {
+		xmm_dst = load_128_aligned ((__m128i*)dst);
+
+		xmm_mask = _mm_unpacklo_epi16 (
+		    unpack_32_1x128 (m), _mm_setzero_si128 ());
+
+		unpack_128_2x128 (xmm_src, &xmm_src_lo, &xmm_src_hi);
+		unpack_128_2x128 (xmm_mask, &xmm_mask_lo, &xmm_mask_hi);
+		unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+
+		expand_alpha_rev_2x128 (xmm_mask_lo, xmm_mask_hi,
+					&xmm_mask_lo, &xmm_mask_hi);
+
+		in_over_2x128 (xmm_src_lo, xmm_src_hi,
+			       mask_00ff, mask_00ff,
+			       xmm_mask_lo, xmm_mask_hi,
+			       &xmm_dst_lo, &xmm_dst_hi);
+
+		save_128_aligned (
+		    (__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+	    }
+
+	    src += 4;
+	    dst += 4;
+	    mask += 4;
+	    w -= 4;
+	}
+
+	while (w)
+	{
+	    m = (uint32_t) *mask++;
+
+	    if (m)
+	    {
+		s = 0xff000000 | *src;
+
+		if (m == 0xff)
+		{
+		    *dst = s;
+		}
+		else
+		{
+		    d = *dst;
+
+		    *dst = pack_1x64_32 (
+			in_over_1x64 (
+			    unpack_32_1x64 (s),
+			    mask_x00ff,
+			    expand_alpha_rev_1x64 (unpack_32_1x64 (m)),
+			    unpack_32_1x64 (d)));
+		}
+
+	    }
+
+	    src++;
+	    dst++;
+	    w--;
+	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
+
 #endif
 
 static const pixman_fast_path_t sse2_fast_paths[] =
 {
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   sse2_composite_over_n_8_0565,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   sse2_composite_over_n_8_0565,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_over_n_8888,           0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_over_n_8888,           0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   sse2_composite_over_n_0565,           0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, sse2_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   sse2_composite_over_8888_0565,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   sse2_composite_over_8888_0565,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_n_8_8888,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_n_8_8888,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_over_n_8_8888,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_n_8_8888,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   sse2_composite_over_n_8_0565,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   sse2_composite_over_n_8_0565,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_over_n_8888,         0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_over_n_8888,         0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   sse2_composite_over_n_0565,         0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_over_8888_8888,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_over_8888_8888,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, sse2_composite_over_8888_8888,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_over_8888_8888,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   sse2_composite_over_8888_0565,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   sse2_composite_over_8888_0565,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_n_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_n_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_over_n_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_n_8_8888,       0 },
 #if 0
     /* FIXME: This code are buggy in MMX version, now the bug was translated to SSE2 version */
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_x888_8_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_x888_8_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_x888_8_8888,    0 },
 #endif
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_x888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_x888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_over_x888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_x888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_8888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_8888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_over_8888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_8888_n_8888,        NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_x888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_x888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_over_x888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_x888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_over_8888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_over_8888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_over_8888_n_8888,    NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_over_8888_n_8888,    NEED_SOLID_MASK },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   sse2_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   sse2_composite_over_pixbuf_0565,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5,   sse2_composite_over_pixbuf_0565,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5,   sse2_composite_over_pixbuf_0565,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   sse2_composite_over_pixbuf_0565,     NEED_PIXBUF },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_copy_area,               0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_copy_area,               0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_a8r8g8b8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_x8r8g8b8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, sse2_composite_over_pixbuf_8888,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8r8g8b8, PIXMAN_r5g6b5,   sse2_composite_over_pixbuf_0565,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5,   sse2_composite_over_pixbuf_0565,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5,   sse2_composite_over_pixbuf_0565,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   sse2_composite_over_pixbuf_0565,    NEED_PIXBUF },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_copy_area,           0 },
 
     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       sse2_composite_add_8000_8000,       0 },
     { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_add_8888_8888,       0 },
     { PIXMAN_OP_ADD,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, sse2_composite_add_8888_8888,       0 },
     { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       sse2_composite_add_8888_8_8,        0 },
 
-    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_src_n_8_8888,  0 },
-    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_src_n_8_8888,  0 },
-    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_src_n_8_8888,  0 },
-    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_src_n_8_8888,  0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_copy_area,               0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, sse2_composite_copy_area,               0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_copy_area,		0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_x8b8g8r8, sse2_composite_copy_area,		0 },
-    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_copy_area,               0 },
-    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_copy_area,               0 },
-    { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   sse2_composite_copy_area,               0 },
-    { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   sse2_composite_copy_area,               0 },
-
-    { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       sse2_composite_in_8_8,                 0 },
-    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       sse2_composite_in_n_8_8,               0 },
+    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8r8g8b8, sse2_composite_src_n_8_8888,        0 },
+    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8r8g8b8, sse2_composite_src_n_8_8888,        0 },
+    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, sse2_composite_src_n_8_8888,        0 },
+    { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, sse2_composite_src_n_8_8888,        0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   sse2_composite_copy_area,           0 },
+    { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   sse2_composite_copy_area,           0 },
+
+    { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       sse2_composite_in_8_8,              0 },
+    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       sse2_composite_in_n_8_8,            0 },
 
     { PIXMAN_OP_NONE },
 };
 
 /*
  * Work around GCC bug causing crashes in Mozilla with SSE2
- * 
+ *
  * When using -msse, gcc generates movdqa instructions assuming that
  * the stack is 16 byte aligned. Unfortunately some applications, such
  * as Mozilla and Mono, end up aligning the stack to 4 bytes, which
@@ -4966,35 +5539,35 @@ __attribute__((__force_align_arg_pointer__))
 #endif
 static void
 sse2_composite (pixman_implementation_t *imp,
-		pixman_op_t     op,
-		pixman_image_t *src,
-		pixman_image_t *mask,
-		pixman_image_t *dest,
-		int32_t         src_x,
-		int32_t         src_y,
-		int32_t         mask_x,
-		int32_t         mask_y,
-		int32_t         dest_x,
-		int32_t         dest_y,
-		int32_t        width,
-		int32_t        height)
+                pixman_op_t              op,
+                pixman_image_t *         src,
+                pixman_image_t *         mask,
+                pixman_image_t *         dest,
+                int32_t                  src_x,
+                int32_t                  src_y,
+                int32_t                  mask_x,
+                int32_t                  mask_y,
+                int32_t                  dest_x,
+                int32_t                  dest_y,
+                int32_t                  width,
+                int32_t                  height)
 {
     if (_pixman_run_fast_path (sse2_fast_paths, imp,
-			       op, src, mask, dest,
-			       src_x, src_y,
-			       mask_x, mask_y,
-			       dest_x, dest_y,
-			       width, height))
+                               op, src, mask, dest,
+                               src_x, src_y,
+                               mask_x, mask_y,
+                               dest_x, dest_y,
+                               width, height))
     {
 	return;
     }
 
     _pixman_implementation_composite (imp->delegate, op,
-				      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
+                                      src, mask, dest,
+                                      src_x, src_y,
+                                      mask_x, mask_y,
+                                      dest_x, dest_y,
+                                      width, height);
 }
 
 #if defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
@@ -5002,19 +5575,22 @@ __attribute__((__force_align_arg_pointer__))
 #endif
 static pixman_bool_t
 sse2_blt (pixman_implementation_t *imp,
-	  uint32_t *src_bits,
-	  uint32_t *dst_bits,
-	  int src_stride,
-	  int dst_stride,
-	  int src_bpp,
-	  int dst_bpp,
-	  int src_x, int src_y,
-	  int dst_x, int dst_y,
-	  int width, int height)
+          uint32_t *               src_bits,
+          uint32_t *               dst_bits,
+          int                      src_stride,
+          int                      dst_stride,
+          int                      src_bpp,
+          int                      dst_bpp,
+          int                      src_x,
+          int                      src_y,
+          int                      dst_x,
+          int                      dst_y,
+          int                      width,
+          int                      height)
 {
     if (!pixman_blt_sse2 (
-	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-	    src_x, src_y, dst_x, dst_y, width, height))
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dst_x, dst_y, width, height))
 
     {
 	return _pixman_implementation_blt (
@@ -5031,14 +5607,14 @@ __attribute__((__force_align_arg_pointer__))
 #endif
 static pixman_bool_t
 sse2_fill (pixman_implementation_t *imp,
-	   uint32_t *bits,
-	   int stride,
-	   int bpp,
-	   int x,
-	   int y,
-	   int width,
-	   int height,
-	   uint32_t xor)
+           uint32_t *               bits,
+           int                      stride,
+           int                      bpp,
+           int                      x,
+           int                      y,
+           int                      width,
+           int                      height,
+           uint32_t xor)
 {
     if (!pixman_fill_sse2 (bits, stride, bpp, x, y, width, height, xor))
     {
@@ -5071,20 +5647,20 @@ _pixman_implementation_create_sse2 (void)
     mask_ffff = create_mask_16_128 (0xffff);
     mask_ff000000 = create_mask_2x32_128 (0xff000000, 0xff000000);
     mask_alpha = create_mask_2x32_128 (0x00ff0000, 0x00000000);
-    
+
     /* MMX constants */
     mask_x565_rgb = create_mask_2x32_64 (0x000001f0, 0x003f001f);
     mask_x565_unpack = create_mask_2x32_64 (0x00000084, 0x04100840);
-    
+
     mask_x0080 = create_mask_16_64 (0x0080);
     mask_x00ff = create_mask_16_64 (0x00ff);
     mask_x0101 = create_mask_16_64 (0x0101);
     mask_x_alpha = create_mask_2x32_64 (0x00ff0000, 0x00000000);
 
-    _mm_empty();
+    _mm_empty ();
 
     /* Set up function pointers */
-    
+
     /* SSE code patch for fbcompose.c */
     imp->combine_32[PIXMAN_OP_OVER] = sse2_combine_over_u;
     imp->combine_32[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_u;
@@ -5096,9 +5672,9 @@ _pixman_implementation_create_sse2 (void)
     imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_u;
     imp->combine_32[PIXMAN_OP_XOR] = sse2_combine_xor_u;
     imp->combine_32[PIXMAN_OP_ADD] = sse2_combine_add_u;
-    
+
     imp->combine_32[PIXMAN_OP_SATURATE] = sse2_combine_saturate_u;
-    
+
     imp->combine_32_ca[PIXMAN_OP_SRC] = sse2_combine_src_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER] = sse2_combine_over_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = sse2_combine_over_reverse_ca;
@@ -5110,11 +5686,11 @@ _pixman_implementation_create_sse2 (void)
     imp->combine_32_ca[PIXMAN_OP_ATOP_REVERSE] = sse2_combine_atop_reverse_ca;
     imp->combine_32_ca[PIXMAN_OP_XOR] = sse2_combine_xor_ca;
     imp->combine_32_ca[PIXMAN_OP_ADD] = sse2_combine_add_ca;
-    
+
     imp->composite = sse2_composite;
     imp->blt = sse2_blt;
     imp->fill = sse2_fill;
-    
+
     return imp;
 }
 
commit 7dc3593d8172e292b39a7d6cc7772fcf9a04bb1f
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 20:31:26 2009 -0400

    Reformat and reindent pixman-solid-fill.c

diff --git a/pixman/pixman-solid-fill.c b/pixman/pixman-solid-fill.c
index 7e50970..667f1f0 100644
--- a/pixman/pixman-solid-fill.c
+++ b/pixman/pixman-solid-fill.c
@@ -25,24 +25,29 @@
 #include "pixman-private.h"
 
 static void
-solid_fill_get_scanline_32 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-			    const uint32_t *mask, uint32_t mask_bits)
+solid_fill_get_scanline_32 (pixman_image_t *image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            uint32_t *      buffer,
+                            const uint32_t *mask,
+                            uint32_t        mask_bits)
 {
     uint32_t *end = buffer + width;
     register uint32_t color = ((solid_fill_t *)image)->color;
-    
+
     while (buffer < end)
 	*(buffer++) = color;
-    
+
     return;
 }
 
 static source_pict_class_t
 solid_fill_classify (pixman_image_t *image,
-		     int	     x,
-		     int	     y,
-		     int	     width,
-		     int	     height)
+                     int             x,
+                     int             y,
+                     int             width,
+                     int             height)
 {
     return (image->source.class = SOURCE_IMAGE_CLASS_HORIZONTAL);
 }
@@ -58,17 +63,17 @@ static uint32_t
 color_to_uint32 (const pixman_color_t *color)
 {
     return
-	(color->alpha >> 8 << 24) |
-	(color->red >> 8 << 16) |
+        (color->alpha >> 8 << 24) |
+        (color->red >> 8 << 16) |
         (color->green & 0xff00) |
-	(color->blue >> 8);
+        (color->blue >> 8);
 }
 
 PIXMAN_EXPORT pixman_image_t *
 pixman_image_create_solid_fill (pixman_color_t *color)
 {
-    pixman_image_t *img = _pixman_image_allocate();
-    
+    pixman_image_t *img = _pixman_image_allocate ();
+
     if (!img)
 	return NULL;
 
@@ -80,6 +85,7 @@ pixman_image_create_solid_fill (pixman_color_t *color)
     img->common.property_changed = solid_fill_property_changed;
 
     solid_fill_property_changed (img);
-    
+
     return img;
 }
+
commit 3db9f5ff6e32c353cff640d3504eb54bd2a69ed3
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 20:30:44 2009 -0400

    Reformat and reindent pixman-region.c

diff --git a/pixman/pixman-region.c b/pixman/pixman-region.c
index d7b1180..7328a63 100644
--- a/pixman/pixman-region.c
+++ b/pixman/pixman-region.c
@@ -1,49 +1,48 @@
-/***********************************************************
-
-Copyright 1987, 1988, 1989, 1998  The Open Group
-
-Permission to use, copy, modify, distribute, and sell this software and its
-documentation for any purpose is hereby granted without fee, provided that
-the above copyright notice appear in all copies and that both that
-copyright notice and this permission notice appear in supporting
-documentation.
-
-The above copyright notice and this permission notice shall be included in
-all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
-OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
-AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-
-Except as contained in this notice, the name of The Open Group shall not be
-used in advertising or otherwise to promote the sale, use or other dealings
-in this Software without prior written authorization from The Open Group.
-
-Copyright 1987, 1988, 1989 by
-Digital Equipment Corporation, Maynard, Massachusetts.
-
-                        All Rights Reserved
-
-Permission to use, copy, modify, and distribute this software and its
-documentation for any purpose and without fee is hereby granted,
-provided that the above copyright notice appear in all copies and that
-both that copyright notice and this permission notice appear in
-supporting documentation, and that the name of Digital not be
-used in advertising or publicity pertaining to distribution of the
-software without specific, written prior permission.
-
-DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
-ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
-DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
-ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-SOFTWARE.
-
-******************************************************************/
+/*
+ * Copyright 1987, 1988, 1989, 1998  The Open Group
+ * 
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation.
+ * 
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * OPEN GROUP BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ * Except as contained in this notice, the name of The Open Group shall not be
+ * used in advertising or otherwise to promote the sale, use or other dealings
+ * in this Software without prior written authorization from The Open Group.
+ * 
+ * Copyright 1987, 1988, 1989 by
+ * Digital Equipment Corporation, Maynard, Massachusetts.
+ * 
+ *                    All Rights Reserved
+ * 
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted,
+ * provided that the above copyright notice appear in all copies and that
+ * both that copyright notice and this permission notice appear in
+ * supporting documentation, and that the name of Digital not be
+ * used in advertising or publicity pertaining to distribution of the
+ * software without specific, written prior permission.
+ * 
+ * DIGITAL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING
+ * ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL
+ * DIGITAL BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+ * ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ */
 
 #include <stdlib.h>
 #include <limits.h>
@@ -53,25 +52,29 @@ SOFTWARE.
 
 #define PIXREGION_NIL(reg) ((reg)->data && !(reg)->data->numRects)
 /* not a region */
-#define PIXREGION_NAR(reg)	((reg)->data == pixman_broken_data)
+#define PIXREGION_NAR(reg)      ((reg)->data == pixman_broken_data)
 #define PIXREGION_NUMRECTS(reg) ((reg)->data ? (reg)->data->numRects : 1)
 #define PIXREGION_SIZE(reg) ((reg)->data ? (reg)->data->size : 0)
-#define PIXREGION_RECTS(reg) ((reg)->data ? (box_type_t *)((reg)->data + 1) \
-			               : &(reg)->extents)
+#define PIXREGION_RECTS(reg) \
+    ((reg)->data ? (box_type_t *)((reg)->data + 1) \
+     : &(reg)->extents)
 #define PIXREGION_BOXPTR(reg) ((box_type_t *)((reg)->data + 1))
-#define PIXREGION_BOX(reg,i) (&PIXREGION_BOXPTR(reg)[i])
-#define PIXREGION_TOP(reg) PIXREGION_BOX(reg, (reg)->data->numRects)
-#define PIXREGION_END(reg) PIXREGION_BOX(reg, (reg)->data->numRects - 1)
+#define PIXREGION_BOX(reg, i) (&PIXREGION_BOXPTR (reg)[i])
+#define PIXREGION_TOP(reg) PIXREGION_BOX (reg, (reg)->data->numRects)
+#define PIXREGION_END(reg) PIXREGION_BOX (reg, (reg)->data->numRects - 1)
 
-#define GOOD(reg) assert(PREFIX(_selfcheck) (reg))
+#define GOOD(reg) assert (PREFIX (_selfcheck) (reg))
 
-static const box_type_t PREFIX(_empty_box_) = {0, 0, 0, 0};
-static const region_data_type_t PREFIX(_empty_data_) = {0, 0};
-static const region_data_type_t PREFIX(_broken_data_) = {0, 0};
+static const box_type_t PREFIX (_empty_box_) = { 0, 0, 0, 0 };
+static const region_data_type_t PREFIX (_empty_data_) = { 0, 0 };
+static const region_data_type_t PREFIX (_broken_data_) = { 0, 0 };
 
-static box_type_t *pixman_region_empty_box = (box_type_t *)&PREFIX(_empty_box_);
-static region_data_type_t *pixman_region_empty_data = (region_data_type_t *)&PREFIX(_empty_data_);
-static region_data_type_t *pixman_broken_data = (region_data_type_t *)&PREFIX(_broken_data_);
+static box_type_t *pixman_region_empty_box =
+    (box_type_t *)&PREFIX (_empty_box_);
+static region_data_type_t *pixman_region_empty_data =
+    (region_data_type_t *)&PREFIX (_empty_data_);
+static region_data_type_t *pixman_broken_data =
+    (region_data_type_t *)&PREFIX (_broken_data_);
 
 static pixman_bool_t
 pixman_break (region_type_t *region);
@@ -117,194 +120,257 @@ pixman_break (region_type_t *region);
  *
  * Adam de Boor wrote most of the original region code.  Joel McCormack
  * substantially modified or rewrote most of the core arithmetic routines, and
- * added pixman_region_validate in order to support several speed improvements to
- * pixman_region_validate_tree.  Bob Scheifler changed the representation to be more
- * compact when empty or a single rectangle, and did a bunch of gratuitous
- * reformatting. Carl Worth did further gratuitous reformatting while re-merging
- * the server and client region code into libpixregion.
+ * added pixman_region_validate in order to support several speed improvements
+ * to pixman_region_validate_tree.  Bob Scheifler changed the representation
+ * to be more compact when empty or a single rectangle, and did a bunch of
+ * gratuitous reformatting. Carl Worth did further gratuitous reformatting
+ * while re-merging the server and client region code into libpixregion.
+ * Soren Sandmann did even more gratuitous reformatting.
  */
 
 /*  true iff two Boxes overlap */
-#define EXTENTCHECK(r1,r2) \
-      (!( ((r1)->x2 <= (r2)->x1)  || \
-          ((r1)->x1 >= (r2)->x2)  || \
-          ((r1)->y2 <= (r2)->y1)  || \
-          ((r1)->y1 >= (r2)->y2) ) )
+#define EXTENTCHECK(r1, r2)	   \
+    (!( ((r1)->x2 <= (r2)->x1)  || \
+        ((r1)->x1 >= (r2)->x2)  || \
+        ((r1)->y2 <= (r2)->y1)  || \
+        ((r1)->y1 >= (r2)->y2) ) )
 
 /* true iff (x,y) is in Box */
-#define INBOX(r,x,y) \
-      ( ((r)->x2 >  x) && \
-        ((r)->x1 <= x) && \
-        ((r)->y2 >  y) && \
-        ((r)->y1 <= y) )
+#define INBOX(r, x, y)	\
+    ( ((r)->x2 >  x) && \
+      ((r)->x1 <= x) && \
+      ((r)->y2 >  y) && \
+      ((r)->y1 <= y) )
 
 /* true iff Box r1 contains Box r2 */
-#define SUBSUMES(r1,r2) \
-      ( ((r1)->x1 <= (r2)->x1) && \
-        ((r1)->x2 >= (r2)->x2) && \
-        ((r1)->y1 <= (r2)->y1) && \
-        ((r1)->y2 >= (r2)->y2) )
+#define SUBSUMES(r1, r2)	\
+    ( ((r1)->x1 <= (r2)->x1) && \
+      ((r1)->x2 >= (r2)->x2) && \
+      ((r1)->y1 <= (r2)->y1) && \
+      ((r1)->y2 >= (r2)->y2) )
 
 static size_t
-PIXREGION_SZOF(size_t n)
+PIXREGION_SZOF (size_t n)
 {
     size_t size = n * sizeof(box_type_t);
+    
     if (n > UINT32_MAX / sizeof(box_type_t))
-        return 0;
+	return 0;
 
     if (sizeof(region_data_type_t) > UINT32_MAX - size)
-        return 0;
+	return 0;
 
     return size + sizeof(region_data_type_t);
 }
 
 static void *
-alloc_data(size_t n)
+alloc_data (size_t n)
 {
-    size_t sz = PIXREGION_SZOF(n);
+    size_t sz = PIXREGION_SZOF (n);
+
     if (!sz)
 	return NULL;
 
-    return malloc(sz);
+    return malloc (sz);
 }
 
-#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free((reg)->data)
-
-#define RECTALLOC_BAIL(region,n,bail) \
-if (!(region)->data || (((region)->data->numRects + (n)) > (region)->data->size)) \
-    if (!pixman_rect_alloc(region, n)) { goto bail; }
-
-#define RECTALLOC(region,n) \
-if (!(region)->data || (((region)->data->numRects + (n)) > (region)->data->size)) \
-    if (!pixman_rect_alloc(region, n)) { return FALSE; }
+#define FREE_DATA(reg) if ((reg)->data && (reg)->data->size) free ((reg)->data)
 
-#define ADDRECT(next_rect,nx1,ny1,nx2,ny2)	\
-{						\
-    next_rect->x1 = nx1;			\
-    next_rect->y1 = ny1;			\
-    next_rect->x2 = nx2;			\
-    next_rect->y2 = ny2;			\
-    next_rect++;				\
-}
+#define RECTALLOC_BAIL(region, n, bail)					\
+    do									\
+    {									\
+	if (!(region)->data ||						\
+	    (((region)->data->numRects + (n)) > (region)->data->size))	\
+	{								\
+	    if (!pixman_rect_alloc (region, n))				\
+		goto bail;						\
+	}								\
+    } while (0)
 
-#define NEWRECT(region,next_rect,nx1,ny1,nx2,ny2)			\
-{									\
-    if (!(region)->data || ((region)->data->numRects == (region)->data->size))\
+#define RECTALLOC(region, n)						\
+    do									\
     {									\
-	if (!pixman_rect_alloc(region, 1))					\
-	    return FALSE;						\
-	next_rect = PIXREGION_TOP(region);					\
-    }									\
-    ADDRECT(next_rect,nx1,ny1,nx2,ny2);					\
-    region->data->numRects++;						\
-    assert(region->data->numRects<=region->data->size);			\
-}
+	if (!(region)->data ||						\
+	    (((region)->data->numRects + (n)) > (region)->data->size))	\
+	{								\
+	    if (!pixman_rect_alloc (region, n)) {			\
+		return FALSE;						\
+	    }								\
+	}								\
+    } while (0)
+
+#define ADDRECT(next_rect, nx1, ny1, nx2, ny2)      \
+    do						    \
+    {						    \
+	next_rect->x1 = nx1;                        \
+	next_rect->y1 = ny1;                        \
+	next_rect->x2 = nx2;                        \
+	next_rect->y2 = ny2;                        \
+	next_rect++;                                \
+    }						    \
+    while (0)
+
+#define NEWRECT(region, next_rect, nx1, ny1, nx2, ny2)			\
+    do									\
+    {									\
+	if (!(region)->data ||						\
+	    ((region)->data->numRects == (region)->data->size))		\
+	{								\
+	    if (!pixman_rect_alloc (region, 1))				\
+		return FALSE;						\
+	    next_rect = PIXREGION_TOP (region);				\
+	}								\
+	ADDRECT (next_rect, nx1, ny1, nx2, ny2);			\
+	region->data->numRects++;					\
+	assert (region->data->numRects <= region->data->size);		\
+    } while (0)
 
-#define DOWNSIZE(reg,numRects)						\
-    if (((numRects) < ((reg)->data->size >> 1)) && ((reg)->data->size > 50)) \
+#define DOWNSIZE(reg, numRects)						\
+    do									\
     {									\
-	region_data_type_t * new_data;				\
-	size_t data_size = PIXREGION_SZOF(numRects);			\
-	if (!data_size)							\
-	    new_data = NULL;						\
-	else								\
-	    new_data = (region_data_type_t *)realloc((reg)->data, data_size); \
-	if (new_data)							\
+	if (((numRects) < ((reg)->data->size >> 1)) &&			\
+	    ((reg)->data->size > 50))					\
 	{								\
-	    new_data->size = (numRects);					\
-	    (reg)->data = new_data;					\
+	    region_data_type_t * new_data;				\
+	    size_t data_size = PIXREGION_SZOF (numRects);		\
+									\
+	    if (!data_size)						\
+	    {								\
+		new_data = NULL;					\
+	    }								\
+	    else							\
+	    {								\
+		new_data = (region_data_type_t *)			\
+		    realloc ((reg)->data, data_size);			\
+	    }								\
+									\
+	    if (new_data)						\
+	    {								\
+		new_data->size = (numRects);				\
+		(reg)->data = new_data;					\
+	    }								\
 	}								\
-    }
+    } while (0)
 
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_equal) (reg1, reg2)
-    region_type_t * reg1;
-    region_type_t * reg2;
+PREFIX (_equal) (region_type_t *reg1, region_type_t *reg2)
 {
     int i;
     box_type_t *rects1;
     box_type_t *rects2;
 
-    if (reg1->extents.x1 != reg2->extents.x1) return FALSE;
-    if (reg1->extents.x2 != reg2->extents.x2) return FALSE;
-    if (reg1->extents.y1 != reg2->extents.y1) return FALSE;
-    if (reg1->extents.y2 != reg2->extents.y2) return FALSE;
-    if (PIXREGION_NUMRECTS(reg1) != PIXREGION_NUMRECTS(reg2)) return FALSE;
+    if (reg1->extents.x1 != reg2->extents.x1)
+	return FALSE;
+    
+    if (reg1->extents.x2 != reg2->extents.x2)
+	return FALSE;
+    
+    if (reg1->extents.y1 != reg2->extents.y1)
+	return FALSE;
+    
+    if (reg1->extents.y2 != reg2->extents.y2)
+	return FALSE;
+    
+    if (PIXREGION_NUMRECTS (reg1) != PIXREGION_NUMRECTS (reg2))
+	return FALSE;
 
-    rects1 = PIXREGION_RECTS(reg1);
-    rects2 = PIXREGION_RECTS(reg2);
-    for (i = 0; i != PIXREGION_NUMRECTS(reg1); i++) {
-	if (rects1[i].x1 != rects2[i].x1) return FALSE;
-	if (rects1[i].x2 != rects2[i].x2) return FALSE;
-	if (rects1[i].y1 != rects2[i].y1) return FALSE;
-	if (rects1[i].y2 != rects2[i].y2) return FALSE;
+    rects1 = PIXREGION_RECTS (reg1);
+    rects2 = PIXREGION_RECTS (reg2);
+    
+    for (i = 0; i != PIXREGION_NUMRECTS (reg1); i++)
+    {
+	if (rects1[i].x1 != rects2[i].x1)
+	    return FALSE;
+	
+	if (rects1[i].x2 != rects2[i].x2)
+	    return FALSE;
+	
+	if (rects1[i].y1 != rects2[i].y1)
+	    return FALSE;
+	
+	if (rects1[i].y2 != rects2[i].y2)
+	    return FALSE;
     }
+
     return TRUE;
 }
 
 int
-PREFIX(_print) (rgn)
-    region_type_t * rgn;
+PREFIX (_print) (region_type_t *rgn)
 {
     int num, size;
     int i;
     box_type_t * rects;
 
-    num = PIXREGION_NUMRECTS(rgn);
-    size = PIXREGION_SIZE(rgn);
-    rects = PIXREGION_RECTS(rgn);
-    fprintf(stderr, "num: %d size: %d\n", num, size);
-    fprintf(stderr, "extents: %d %d %d %d\n",
-	   rgn->extents.x1, rgn->extents.y1, rgn->extents.x2, rgn->extents.y2);
+    num = PIXREGION_NUMRECTS (rgn);
+    size = PIXREGION_SIZE (rgn);
+    rects = PIXREGION_RECTS (rgn);
+
+    fprintf (stderr, "num: %d size: %d\n", num, size);
+    fprintf (stderr, "extents: %d %d %d %d\n",
+             rgn->extents.x1,
+	     rgn->extents.y1,
+	     rgn->extents.x2,
+	     rgn->extents.y2);
+    
     for (i = 0; i < num; i++)
-	fprintf(stderr, "%d %d %d %d \n",
-		rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2);
-    fprintf(stderr, "\n");
+    {
+	fprintf (stderr, "%d %d %d %d \n",
+	         rects[i].x1, rects[i].y1, rects[i].x2, rects[i].y2);
+    }
+    
+    fprintf (stderr, "\n");
+
     return(num);
 }
 
 
 PIXMAN_EXPORT void
-PREFIX(_init) (region_type_t *region)
+PREFIX (_init) (region_type_t *region)
 {
     region->extents = *pixman_region_empty_box;
     region->data = pixman_region_empty_data;
 }
 
 PIXMAN_EXPORT void
-PREFIX(_init_rect) (region_type_t *region,
-		    int x, int y, unsigned int width, unsigned int height)
+PREFIX (_init_rect) (region_type_t *	region,
+                     int		x,
+		     int		y,
+		     unsigned int	width,
+		     unsigned int	height)
 {
     region->extents.x1 = x;
     region->extents.y1 = y;
     region->extents.x2 = x + width;
     region->extents.y2 = y + height;
+
     region->data = NULL;
 }
 
 PIXMAN_EXPORT void
-PREFIX(_init_with_extents) (region_type_t *region, box_type_t *extents)
+PREFIX (_init_with_extents) (region_type_t *region, box_type_t *extents)
 {
     region->extents = *extents;
+
     region->data = NULL;
 }
 
 PIXMAN_EXPORT void
-PREFIX(_fini) (region_type_t *region)
+PREFIX (_fini) (region_type_t *region)
 {
     GOOD (region);
     FREE_DATA (region);
 }
 
 PIXMAN_EXPORT int
-PREFIX(_n_rects) (region_type_t *region)
+PREFIX (_n_rects) (region_type_t *region)
 {
     return PIXREGION_NUMRECTS (region);
 }
 
 PIXMAN_EXPORT box_type_t *
-PREFIX(_rectangles) (region_type_t *region,
-				  int		    *n_rects)
+PREFIX (_rectangles) (region_type_t *region,
+                      int               *n_rects)
 {
     if (n_rects)
 	*n_rects = PIXREGION_NUMRECTS (region);
@@ -316,80 +382,109 @@ static pixman_bool_t
 pixman_break (region_type_t *region)
 {
     FREE_DATA (region);
+
     region->extents = *pixman_region_empty_box;
     region->data = pixman_broken_data;
+
     return FALSE;
 }
 
 static pixman_bool_t
-pixman_rect_alloc (region_type_t * region, int n)
+pixman_rect_alloc (region_type_t * region,
+                   int             n)
 {
     region_data_type_t *data;
 
     if (!region->data)
     {
 	n++;
-	region->data = alloc_data(n);
+	region->data = alloc_data (n);
+
 	if (!region->data)
 	    return pixman_break (region);
+
 	region->data->numRects = 1;
-	*PIXREGION_BOXPTR(region) = region->extents;
+	*PIXREGION_BOXPTR (region) = region->extents;
     }
     else if (!region->data->size)
     {
-	region->data = alloc_data(n);
+	region->data = alloc_data (n);
+
 	if (!region->data)
 	    return pixman_break (region);
+
 	region->data->numRects = 0;
     }
     else
     {
 	size_t data_size;
+
 	if (n == 1)
 	{
 	    n = region->data->numRects;
 	    if (n > 500) /* XXX pick numbers out of a hat */
 		n = 250;
 	}
+
 	n += region->data->numRects;
-	data_size = PIXREGION_SZOF(n);
+	data_size = PIXREGION_SZOF (n);
+
 	if (!data_size)
+	{
 	    data = NULL;
+	}
 	else
-	    data = (region_data_type_t *)realloc(region->data, PIXREGION_SZOF(n));
+	{
+	    data = (region_data_type_t *)
+		realloc (region->data, PIXREGION_SZOF (n));
+	}
+	
 	if (!data)
 	    return pixman_break (region);
+	
 	region->data = data;
     }
+    
     region->data->size = n;
+
     return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_copy) (region_type_t *dst, region_type_t *src)
+PREFIX (_copy) (region_type_t *dst, region_type_t *src)
 {
-    GOOD(dst);
-    GOOD(src);
+    GOOD (dst);
+    GOOD (src);
+    
     if (dst == src)
 	return TRUE;
+    
     dst->extents = src->extents;
+
     if (!src->data || !src->data->size)
     {
-	FREE_DATA(dst);
+	FREE_DATA (dst);
 	dst->data = src->data;
 	return TRUE;
     }
+    
     if (!dst->data || (dst->data->size < src->data->numRects))
     {
-	FREE_DATA(dst);
-	dst->data = alloc_data(src->data->numRects);
+	FREE_DATA (dst);
+
+	dst->data = alloc_data (src->data->numRects);
+
 	if (!dst->data)
 	    return pixman_break (dst);
+
 	dst->data->size = src->data->numRects;
     }
+
     dst->data->numRects = src->data->numRects;
-    memmove((char *)PIXREGION_BOXPTR(dst),(char *)PIXREGION_BOXPTR(src),
-	  dst->data->numRects * sizeof(box_type_t));
+
+    memmove ((char *)PIXREGION_BOXPTR (dst), (char *)PIXREGION_BOXPTR (src),
+             dst->data->numRects * sizeof(box_type_t));
+
     return TRUE;
 }
 
@@ -416,20 +511,20 @@ PREFIX(_copy) (region_type_t *dst, region_type_t *src)
  *-----------------------------------------------------------------------
  */
 static inline int
-pixman_coalesce (
-    region_type_t *	region,	    	/* Region to coalesce		     */
-    int	    	  	prev_start,  	/* Index of start of previous band   */
-    int	    	  	cur_start)   	/* Index of start of current band    */
+pixman_coalesce (region_type_t * region,      /* Region to coalesce		 */
+		 int             prev_start,  /* Index of start of previous band */
+		 int             cur_start)   /* Index of start of current band  */
 {
-    box_type_t *	prev_box;   	/* Current box in previous band	     */
-    box_type_t *	cur_box;    	/* Current box in current band       */
-    int  	numRects;	/* Number rectangles in both bands   */
-    int	y2;		/* Bottom of current band	     */
+    box_type_t *prev_box;       /* Current box in previous band	     */
+    box_type_t *cur_box;        /* Current box in current band       */
+    int numRects;               /* Number rectangles in both bands   */
+    int y2;                     /* Bottom of current band	     */
+
     /*
      * Figure out how many rectangles are in the band.
      */
     numRects = cur_start - prev_start;
-    assert(numRects == region->data->numRects - cur_start);
+    assert (numRects == region->data->numRects - cur_start);
 
     if (!numRects) return cur_start;
 
@@ -437,8 +532,8 @@ pixman_coalesce (
      * The bands may only be coalesced if the bottom of the previous
      * matches the top scanline of the current.
      */
-    prev_box = PIXREGION_BOX(region, prev_start);
-    cur_box = PIXREGION_BOX(region, cur_start);
+    prev_box = PIXREGION_BOX (region, prev_start);
+    cur_box = PIXREGION_BOX (region, cur_start);
     if (prev_box->y2 != cur_box->y1) return cur_start;
 
     /*
@@ -449,14 +544,16 @@ pixman_coalesce (
      */
     y2 = cur_box->y2;
 
-    do {
-	if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2)) {
+    do
+    {
+	if ((prev_box->x1 != cur_box->x1) || (prev_box->x2 != cur_box->x2))
 	    return (cur_start);
-	}
+	
 	prev_box++;
 	cur_box++;
 	numRects--;
-    } while (numRects);
+    }
+    while (numRects);
 
     /*
      * The bands may be merged, so set the bottom y of each box
@@ -464,22 +561,28 @@ pixman_coalesce (
      */
     numRects = cur_start - prev_start;
     region->data->numRects -= numRects;
-    do {
+
+    do
+    {
 	prev_box--;
 	prev_box->y2 = y2;
 	numRects--;
-    } while (numRects);
+    }
+    while (numRects);
+
     return prev_start;
 }
 
 /* Quicky macro to avoid trivial reject procedure calls to pixman_coalesce */
 
-#define COALESCE(new_reg, prev_band, cur_band)				\
-    if (cur_band - prev_band == new_reg->data->numRects - cur_band) {	\
-	prev_band = pixman_coalesce(new_reg, prev_band, cur_band);		\
-    } else {								\
-	prev_band = cur_band;						\
-    }
+#define COALESCE(new_reg, prev_band, cur_band)                          \
+    do									\
+    {									\
+	if (cur_band - prev_band == new_reg->data->numRects - cur_band)	\
+	    prev_band = pixman_coalesce (new_reg, prev_band, cur_band);	\
+	else								\
+	    prev_band = cur_band;					\
+    } while (0)
 
 /*-
  *-----------------------------------------------------------------------
@@ -497,55 +600,58 @@ pixman_coalesce (
  *
  *-----------------------------------------------------------------------
  */
-
 static inline pixman_bool_t
-pixman_region_append_non_o (
-    region_type_t *	region,
-    box_type_t *	r,
-    box_type_t *  	  	r_end,
-    int  	y1,
-    int  	y2)
+pixman_region_append_non_o (region_type_t * region,
+			    box_type_t *    r,
+			    box_type_t *    r_end,
+			    int             y1,
+			    int             y2)
 {
-    box_type_t *	next_rect;
-    int	new_rects;
+    box_type_t *next_rect;
+    int new_rects;
 
     new_rects = r_end - r;
 
-    assert(y1 < y2);
-    assert(new_rects != 0);
+    assert (y1 < y2);
+    assert (new_rects != 0);
 
     /* Make sure we have enough space for all rectangles to be added */
-    RECTALLOC(region, new_rects);
-    next_rect = PIXREGION_TOP(region);
+    RECTALLOC (region, new_rects);
+    next_rect = PIXREGION_TOP (region);
     region->data->numRects += new_rects;
-    do {
-	assert(r->x1 < r->x2);
-	ADDRECT(next_rect, r->x1, y1, r->x2, y2);
+
+    do
+    {
+	assert (r->x1 < r->x2);
+	ADDRECT (next_rect, r->x1, y1, r->x2, y2);
 	r++;
-    } while (r != r_end);
+    }
+    while (r != r_end);
 
     return TRUE;
 }
 
-#define FIND_BAND(r, r_band_end, r_end, ry1)		    \
-{							    \
-    ry1 = r->y1;					    \
-    r_band_end = r+1;					    \
-    while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) {   \
-	r_band_end++;					    \
-    }							    \
-}
-
-#define	APPEND_REGIONS(new_reg, r, r_end)					\
-{									\
-    int new_rects;							\
-    if ((new_rects = r_end - r)) {					\
-	RECTALLOC_BAIL(new_reg, new_rects, bail);					\
-	memmove((char *)PIXREGION_TOP(new_reg),(char *)r, 			\
-              new_rects * sizeof(box_type_t));				\
-	new_reg->data->numRects += new_rects;				\
-    }									\
-}
+#define FIND_BAND(r, r_band_end, r_end, ry1)			     \
+    do								     \
+    {								     \
+	ry1 = r->y1;						     \
+	r_band_end = r + 1;					     \
+	while ((r_band_end != r_end) && (r_band_end->y1 == ry1)) {   \
+	    r_band_end++;					     \
+	}							     \
+    } while (0)
+
+#define APPEND_REGIONS(new_reg, r, r_end)				\
+    do									\
+    {									\
+	int new_rects;							\
+	if ((new_rects = r_end - r)) {					\
+	    RECTALLOC_BAIL (new_reg, new_rects, bail);			\
+	    memmove ((char *)PIXREGION_TOP (new_reg), (char *)r,	\
+		     new_rects * sizeof(box_type_t));			\
+	    new_reg->data->numRects += new_rects;			\
+	}								\
+    } while (0)
 
 /*-
  *-----------------------------------------------------------------------
@@ -576,53 +682,53 @@ pixman_region_append_non_o (
  *-----------------------------------------------------------------------
  */
 
-typedef pixman_bool_t (*overlap_proc_ptr)(
-    region_type_t	 *region,
-    box_type_t *r1,
-    box_type_t *r1_end,
-    box_type_t *r2,
-    box_type_t *r2_end,
-    int    	 y1,
-    int    	 y2,
-    int		 *overlap);
+typedef pixman_bool_t (*overlap_proc_ptr) (region_type_t *region,
+					   box_type_t *   r1,
+					   box_type_t *   r1_end,
+					   box_type_t *   r2,
+					   box_type_t *   r2_end,
+					   int            y1,
+					   int            y2,
+					   int *          overlap);
 
 static pixman_bool_t
-pixman_op(
-    region_type_t *new_reg,		    /* Place to store result	     */
-    region_type_t *       reg1,		    /* First region in operation     */
-    region_type_t *       reg2,		    /* 2d region in operation        */
-    overlap_proc_ptr  overlap_func,            /* Function to call for over-
-					     * lapping bands		     */
-    int	    append_non1,		    /* Append non-overlapping bands  */
-					    /* in region 1 ? */
-    int	    append_non2,		    /* Append non-overlapping bands  */
-					    /* in region 2 ? */
-    int	    *overlap)
+pixman_op (region_type_t *  new_reg,               /* Place to store result	    */
+	   region_type_t *  reg1,                  /* First region in operation     */
+	   region_type_t *  reg2,                  /* 2d region in operation        */
+	   overlap_proc_ptr overlap_func,          /* Function to call for over-
+						    * lapping bands		    */
+	   int              append_non1,           /* Append non-overlapping bands  
+						    * in region 1 ?
+						    */
+	   int              append_non2,           /* Append non-overlapping bands
+						    * in region 2 ?
+						    */
+	   int *            overlap)
 {
-    box_type_t * r1;			    /* Pointer into first region     */
-    box_type_t * r2;			    /* Pointer into 2d region	     */
-    box_type_t *	    r1_end;		    /* End of 1st region	     */
-    box_type_t *	    r2_end;		    /* End of 2d region		     */
-    int	    ybot;		    /* Bottom of intersection	     */
-    int	    ytop;		    /* Top of intersection	     */
-    region_data_type_t *	    old_data;		    /* Old data for new_reg	     */
-    int		    prev_band;		    /* Index of start of
-					     * previous band in new_reg       */
-    int		    cur_band;		    /* Index of start of current
-					     * band in new_reg		     */
-    box_type_t * r1_band_end;		    /* End of current band in r1     */
-    box_type_t * r2_band_end;		    /* End of current band in r2     */
-    int	    top;		    /* Top of non-overlapping band   */
-    int	    bot;		    /* Bottom of non-overlapping band*/
-    int    r1y1;		    /* Temps for r1->y1 and r2->y1   */
-    int    r2y1;
-    int		    new_size;
-    int		    numRects;
+    box_type_t *r1;                 /* Pointer into first region     */
+    box_type_t *r2;                 /* Pointer into 2d region	     */
+    box_type_t *r1_end;             /* End of 1st region	     */
+    box_type_t *r2_end;             /* End of 2d region		     */
+    int ybot;                       /* Bottom of intersection	     */
+    int ytop;                       /* Top of intersection	     */
+    region_data_type_t *old_data;   /* Old data for new_reg	     */
+    int prev_band;                  /* Index of start of
+				     * previous band in new_reg       */
+    int cur_band;                   /* Index of start of current
+				     * band in new_reg		     */
+    box_type_t * r1_band_end;       /* End of current band in r1     */
+    box_type_t * r2_band_end;       /* End of current band in r2     */
+    int top;                        /* Top of non-overlapping band   */
+    int bot;                        /* Bottom of non-overlapping band*/
+    int r1y1;                       /* Temps for r1->y1 and r2->y1   */
+    int r2y1;
+    int new_size;
+    int numRects;
 
     /*
      * Break any region computed from a broken region
      */
-    if (PIXREGION_NAR (reg1) || PIXREGION_NAR(reg2))
+    if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2))
 	return pixman_break (new_reg);
 
     /*
@@ -633,35 +739,44 @@ pixman_op(
      * another array of rectangles for it to use.
      */
 
-    r1 = PIXREGION_RECTS(reg1);
-    new_size = PIXREGION_NUMRECTS(reg1);
+    r1 = PIXREGION_RECTS (reg1);
+    new_size = PIXREGION_NUMRECTS (reg1);
     r1_end = r1 + new_size;
-    numRects = PIXREGION_NUMRECTS(reg2);
-    r2 = PIXREGION_RECTS(reg2);
+
+    numRects = PIXREGION_NUMRECTS (reg2);
+    r2 = PIXREGION_RECTS (reg2);
     r2_end = r2 + numRects;
-    assert(r1 != r1_end);
-    assert(r2 != r2_end);
+    
+    assert (r1 != r1_end);
+    assert (r2 != r2_end);
 
     old_data = (region_data_type_t *)NULL;
+
     if (((new_reg == reg1) && (new_size > 1)) ||
-	((new_reg == reg2) && (numRects > 1)))
+        ((new_reg == reg2) && (numRects > 1)))
     {
-	old_data = new_reg->data;
-	new_reg->data = pixman_region_empty_data;
+        old_data = new_reg->data;
+        new_reg->data = pixman_region_empty_data;
     }
+
     /* guess at new size */
     if (numRects > new_size)
 	new_size = numRects;
+
     new_size <<= 1;
+
     if (!new_reg->data)
 	new_reg->data = pixman_region_empty_data;
     else if (new_reg->data->size)
 	new_reg->data->numRects = 0;
-    if (new_size > new_reg->data->size) {
-	if (!pixman_rect_alloc(new_reg, new_size)) {
-	    if (old_data)
+
+    if (new_size > new_reg->data->size)
+    {
+        if (!pixman_rect_alloc (new_reg, new_size))
+        {
+            if (old_data)
 		free (old_data);
-	    return FALSE;
+            return FALSE;
 	}
     }
 
@@ -670,7 +785,7 @@ pixman_op(
      * In the upcoming loop, ybot and ytop serve different functions depending
      * on whether the band being handled is an overlapping or non-overlapping
      * band.
-     * 	In the case of a non-overlapping band (only one of the regions
+     *  In the case of a non-overlapping band (only one of the regions
      * has points in the band), ybot is the bottom of the most recent
      * intersection and thus clips the top of the rectangles in that band.
      * ytop is the top of the next intersection between the two regions and
@@ -679,7 +794,7 @@ pixman_op(
      * the top of the rectangles of both regions and ybot clips the bottoms.
      */
 
-    ybot = MIN(r1->y1, r2->y1);
+    ybot = MIN (r1->y1, r2->y1);
 
     /*
      * prev_band serves to mark the start of the previous band so rectangles
@@ -692,21 +807,22 @@ pixman_op(
      */
     prev_band = 0;
 
-    do {
-	/*
+    do
+    {
+        /*
 	 * This algorithm proceeds one source-band (as opposed to a
 	 * destination band, which is determined by where the two regions
 	 * intersect) at a time. r1_band_end and r2_band_end serve to mark the
 	 * rectangle after the last one in the current band for their
 	 * respective regions.
 	 */
-	assert(r1 != r1_end);
-	assert(r2 != r2_end);
+        assert (r1 != r1_end);
+        assert (r2 != r2_end);
 
-	FIND_BAND(r1, r1_band_end, r1_end, r1y1);
-	FIND_BAND(r2, r2_band_end, r2_end, r2y1);
+        FIND_BAND (r1, r1_band_end, r1_end, r1y1);
+        FIND_BAND (r2, r2_band_end, r2_end, r2y1);
 
-	/*
+        /*
 	 * First handle the band that doesn't intersect, if any.
 	 *
 	 * Note that attention is restricted to one band in the
@@ -714,58 +830,79 @@ pixman_op(
 	 * bands between the current position and the next place it overlaps
 	 * the other, this entire loop will be passed through n times.
 	 */
-	if (r1y1 < r2y1) {
-	    if (append_non1) {
-		top = MAX(r1y1, ybot);
-		bot = MIN(r1->y2, r2y1);
-		if (top != bot)	{
-		    cur_band = new_reg->data->numRects;
-		    if (!pixman_region_append_non_o(new_reg, r1, r1_band_end, top, bot))
+        if (r1y1 < r2y1)
+        {
+            if (append_non1)
+            {
+                top = MAX (r1y1, ybot);
+                bot = MIN (r1->y2, r2y1);
+                if (top != bot)
+                {
+                    cur_band = new_reg->data->numRects;
+                    if (!pixman_region_append_non_o (new_reg, r1, r1_band_end, top, bot))
 			goto bail;
-		    COALESCE(new_reg, prev_band, cur_band);
+                    COALESCE (new_reg, prev_band, cur_band);
 		}
 	    }
-	    ytop = r2y1;
-	} else if (r2y1 < r1y1) {
-	    if (append_non2) {
-		top = MAX(r2y1, ybot);
-		bot = MIN(r2->y2, r1y1);
-		if (top != bot) {
-		    cur_band = new_reg->data->numRects;
-		    if (!pixman_region_append_non_o(new_reg, r2, r2_band_end, top, bot))
+            ytop = r2y1;
+	}
+        else if (r2y1 < r1y1)
+        {
+            if (append_non2)
+            {
+                top = MAX (r2y1, ybot);
+                bot = MIN (r2->y2, r1y1);
+		
+                if (top != bot)
+                {
+                    cur_band = new_reg->data->numRects;
+
+                    if (!pixman_region_append_non_o (new_reg, r2, r2_band_end, top, bot))
 			goto bail;
-		    COALESCE(new_reg, prev_band, cur_band);
+
+                    COALESCE (new_reg, prev_band, cur_band);
 		}
 	    }
-	    ytop = r1y1;
-	} else {
-	    ytop = r1y1;
+            ytop = r1y1;
+	}
+        else
+        {
+            ytop = r1y1;
 	}
 
-	/*
+        /*
 	 * Now see if we've hit an intersecting band. The two bands only
 	 * intersect if ybot > ytop
 	 */
-	ybot = MIN(r1->y2, r2->y2);
-	if (ybot > ytop) {
-	    cur_band = new_reg->data->numRects;
-	    if (!(* overlap_func)(new_reg,
-				 r1, r1_band_end,
-				 r2, r2_band_end,
-				 ytop, ybot,
-				 overlap))
+        ybot = MIN (r1->y2, r2->y2);
+        if (ybot > ytop)
+        {
+            cur_band = new_reg->data->numRects;
+
+            if (!(*overlap_func)(new_reg,
+                                 r1, r1_band_end,
+                                 r2, r2_band_end,
+                                 ytop, ybot,
+                                 overlap))
+	    {
 		goto bail;
-	    COALESCE(new_reg, prev_band, cur_band);
+	    }
+	    
+            COALESCE (new_reg, prev_band, cur_band);
 	}
 
-	/*
+        /*
 	 * If we've finished with a band (y2 == ybot) we skip forward
 	 * in the region to the next band.
 	 */
-	if (r1->y2 == ybot) r1 = r1_band_end;
-	if (r2->y2 == ybot) r2 = r2_band_end;
+        if (r1->y2 == ybot)
+	    r1 = r1_band_end;
 
-    } while (r1 != r1_end && r2 != r2_end);
+        if (r2->y2 == ybot)
+	    r2 = r2_band_end;
+
+    }
+    while (r1 != r1_end && r2 != r2_end);
 
     /*
      * Deal with whichever region (if any) still has rectangles left.
@@ -775,55 +912,70 @@ pixman_op(
      * regardless of how many bands, into one final append to the list.
      */
 
-    if ((r1 != r1_end) && append_non1) {
-	/* Do first non_overlap1Func call, which may be able to coalesce */
-	FIND_BAND(r1, r1_band_end, r1_end, r1y1);
-	cur_band = new_reg->data->numRects;
-	if (!pixman_region_append_non_o(new_reg,
-				      r1, r1_band_end,
-				      MAX(r1y1, ybot), r1->y2))
+    if ((r1 != r1_end) && append_non1)
+    {
+        /* Do first non_overlap1Func call, which may be able to coalesce */
+        FIND_BAND (r1, r1_band_end, r1_end, r1y1);
+	
+        cur_band = new_reg->data->numRects;
+	
+        if (!pixman_region_append_non_o (new_reg,
+                                         r1, r1_band_end,
+                                         MAX (r1y1, ybot), r1->y2))
+	{
 	    goto bail;
-	COALESCE(new_reg, prev_band, cur_band);
-	/* Just append the rest of the boxes  */
-	APPEND_REGIONS(new_reg, r1_band_end, r1_end);
+	}
+	
+        COALESCE (new_reg, prev_band, cur_band);
+
+        /* Just append the rest of the boxes  */
+        APPEND_REGIONS (new_reg, r1_band_end, r1_end);
+    }
+    else if ((r2 != r2_end) && append_non2)
+    {
+        /* Do first non_overlap2Func call, which may be able to coalesce */
+        FIND_BAND (r2, r2_band_end, r2_end, r2y1);
 
-    } else if ((r2 != r2_end) && append_non2) {
-	/* Do first non_overlap2Func call, which may be able to coalesce */
-	FIND_BAND(r2, r2_band_end, r2_end, r2y1);
 	cur_band = new_reg->data->numRects;
-	if (!pixman_region_append_non_o(new_reg,
-				      r2, r2_band_end,
-				      MAX(r2y1, ybot), r2->y2))
+
+        if (!pixman_region_append_non_o (new_reg,
+                                         r2, r2_band_end,
+                                         MAX (r2y1, ybot), r2->y2))
+	{
 	    goto bail;
-	COALESCE(new_reg, prev_band, cur_band);
-	/* Append rest of boxes */
-	APPEND_REGIONS(new_reg, r2_band_end, r2_end);
+	}
+
+        COALESCE (new_reg, prev_band, cur_band);
+
+        /* Append rest of boxes */
+        APPEND_REGIONS (new_reg, r2_band_end, r2_end);
     }
 
     if (old_data)
-	free(old_data);
+	free (old_data);
 
     if (!(numRects = new_reg->data->numRects))
     {
-	FREE_DATA(new_reg);
-	new_reg->data = pixman_region_empty_data;
+        FREE_DATA (new_reg);
+        new_reg->data = pixman_region_empty_data;
     }
     else if (numRects == 1)
     {
-	new_reg->extents = *PIXREGION_BOXPTR(new_reg);
-	FREE_DATA(new_reg);
-	new_reg->data = (region_data_type_t *)NULL;
+        new_reg->extents = *PIXREGION_BOXPTR (new_reg);
+        FREE_DATA (new_reg);
+        new_reg->data = (region_data_type_t *)NULL;
     }
     else
     {
-	DOWNSIZE(new_reg, numRects);
+        DOWNSIZE (new_reg, numRects);
     }
 
     return TRUE;
 
 bail:
     if (old_data)
-	free(old_data);
+	free (old_data);
+
     return pixman_break (new_reg);
 }
 
@@ -831,8 +983,8 @@ bail:
  *-----------------------------------------------------------------------
  * pixman_set_extents --
  *	Reset the extents of a region to what they should be. Called by
- *	pixman_region_subtract and pixman_region_intersect as they can't figure it out along the
- *	way or do so easily, as pixman_region_union can.
+ *	pixman_region_subtract and pixman_region_intersect as they can't
+ *      figure it out along the way or do so easily, as pixman_region_union can.
  *
  * Results:
  *	None.
@@ -849,15 +1001,16 @@ pixman_set_extents (region_type_t *region)
 
     if (!region->data)
 	return;
+
     if (!region->data->size)
     {
-	region->extents.x2 = region->extents.x1;
-	region->extents.y2 = region->extents.y1;
-	return;
+        region->extents.x2 = region->extents.x1;
+        region->extents.y2 = region->extents.y1;
+        return;
     }
 
-    box = PIXREGION_BOXPTR(region);
-    box_end = PIXREGION_END(region);
+    box = PIXREGION_BOXPTR (region);
+    box_end = PIXREGION_END (region);
 
     /*
      * Since box is the first rectangle in the region, it must have the
@@ -871,16 +1024,18 @@ pixman_set_extents (region_type_t *region)
     region->extents.x2 = box_end->x2;
     region->extents.y2 = box_end->y2;
 
-    assert(region->extents.y1 < region->extents.y2);
-    while (box <= box_end) {
-	if (box->x1 < region->extents.x1)
+    assert (region->extents.y1 < region->extents.y2);
+
+    while (box <= box_end)
+    {
+        if (box->x1 < region->extents.x1)
 	    region->extents.x1 = box->x1;
-	if (box->x2 > region->extents.x2)
+        if (box->x2 > region->extents.x2)
 	    region->extents.x2 = box->x2;
-	box++;
-    };
+        box++;
+    }
 
-    assert(region->extents.x1 < region->extents.x2);
+    assert (region->extents.x1 < region->extents.x2);
 }
 
 /*======================================================================
@@ -902,124 +1057,144 @@ pixman_set_extents (region_type_t *region)
 /*ARGSUSED*/
 static pixman_bool_t
 pixman_region_intersect_o (region_type_t *region,
-			  box_type_t    *r1,
-			  box_type_t    *r1_end,
-			  box_type_t    *r2,
-			  box_type_t    *r2_end,
-			  int    	     y1,
-			  int    	     y2,
-			  int		    *overlap)
+                           box_type_t *   r1,
+                           box_type_t *   r1_end,
+                           box_type_t *   r2,
+                           box_type_t *   r2_end,
+                           int            y1,
+                           int            y2,
+                           int *          overlap)
 {
-    int  	x1;
-    int  	x2;
-    box_type_t *	next_rect;
+    int x1;
+    int x2;
+    box_type_t *        next_rect;
 
-    next_rect = PIXREGION_TOP(region);
+    next_rect = PIXREGION_TOP (region);
 
-    assert(y1 < y2);
-    assert(r1 != r1_end && r2 != r2_end);
+    assert (y1 < y2);
+    assert (r1 != r1_end && r2 != r2_end);
 
-    do {
-	x1 = MAX(r1->x1, r2->x1);
-	x2 = MIN(r1->x2, r2->x2);
+    do
+    {
+        x1 = MAX (r1->x1, r2->x1);
+        x2 = MIN (r1->x2, r2->x2);
 
-	/*
+        /*
 	 * If there's any overlap between the two rectangles, add that
 	 * overlap to the new region.
 	 */
-	if (x1 < x2)
-	    NEWRECT(region, next_rect, x1, y1, x2, y2);
+        if (x1 < x2)
+	    NEWRECT (region, next_rect, x1, y1, x2, y2);
 
-	/*
+        /*
 	 * Advance the pointer(s) with the leftmost right side, since the next
 	 * rectangle on that list may still overlap the other region's
 	 * current rectangle.
 	 */
-	if (r1->x2 == x2) {
-	    r1++;
+        if (r1->x2 == x2)
+        {
+            r1++;
 	}
-	if (r2->x2 == x2) {
-	    r2++;
+        if (r2->x2 == x2)
+        {
+            r2++;
 	}
-    } while ((r1 != r1_end) && (r2 != r2_end));
+    }
+    while ((r1 != r1_end) && (r2 != r2_end));
 
     return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_intersect) (region_type_t * 	new_reg,
-			 region_type_t * 	reg1,
-			 region_type_t *	reg2)
+PREFIX (_intersect) (region_type_t *     new_reg,
+                     region_type_t *        reg1,
+                     region_type_t *        reg2)
 {
-    GOOD(reg1);
-    GOOD(reg2);
-    GOOD(new_reg);
-   /* check for trivial reject */
-    if (PIXREGION_NIL(reg1)  || PIXREGION_NIL(reg2) ||
-	!EXTENTCHECK(&reg1->extents, &reg2->extents))
-    {
-	/* Covers about 20% of all cases */
-	FREE_DATA(new_reg);
-	new_reg->extents.x2 = new_reg->extents.x1;
-	new_reg->extents.y2 = new_reg->extents.y1;
-	if (PIXREGION_NAR(reg1) || PIXREGION_NAR(reg2))
-	{
-	    new_reg->data = pixman_broken_data;
-	    return FALSE;
+    GOOD (reg1);
+    GOOD (reg2);
+    GOOD (new_reg);
+
+    /* check for trivial reject */
+    if (PIXREGION_NIL (reg1) || PIXREGION_NIL (reg2) ||
+        !EXTENTCHECK (&reg1->extents, &reg2->extents))
+    {
+        /* Covers about 20% of all cases */
+        FREE_DATA (new_reg);
+        new_reg->extents.x2 = new_reg->extents.x1;
+        new_reg->extents.y2 = new_reg->extents.y1;
+        if (PIXREGION_NAR (reg1) || PIXREGION_NAR (reg2))
+        {
+            new_reg->data = pixman_broken_data;
+            return FALSE;
 	}
-	else
+        else
+	{
 	    new_reg->data = pixman_region_empty_data;
+	}
     }
     else if (!reg1->data && !reg2->data)
     {
-	/* Covers about 80% of cases that aren't trivially rejected */
-	new_reg->extents.x1 = MAX(reg1->extents.x1, reg2->extents.x1);
-	new_reg->extents.y1 = MAX(reg1->extents.y1, reg2->extents.y1);
-	new_reg->extents.x2 = MIN(reg1->extents.x2, reg2->extents.x2);
-	new_reg->extents.y2 = MIN(reg1->extents.y2, reg2->extents.y2);
-	FREE_DATA(new_reg);
+        /* Covers about 80% of cases that aren't trivially rejected */
+        new_reg->extents.x1 = MAX (reg1->extents.x1, reg2->extents.x1);
+        new_reg->extents.y1 = MAX (reg1->extents.y1, reg2->extents.y1);
+        new_reg->extents.x2 = MIN (reg1->extents.x2, reg2->extents.x2);
+        new_reg->extents.y2 = MIN (reg1->extents.y2, reg2->extents.y2);
+
+        FREE_DATA (new_reg);
+
 	new_reg->data = (region_data_type_t *)NULL;
     }
-    else if (!reg2->data && SUBSUMES(&reg2->extents, &reg1->extents))
+    else if (!reg2->data && SUBSUMES (&reg2->extents, &reg1->extents))
     {
-	return PREFIX(_copy) (new_reg, reg1);
+        return PREFIX (_copy) (new_reg, reg1);
     }
-    else if (!reg1->data && SUBSUMES(&reg1->extents, &reg2->extents))
+    else if (!reg1->data && SUBSUMES (&reg1->extents, &reg2->extents))
     {
-	return PREFIX(_copy) (new_reg, reg2);
+        return PREFIX (_copy) (new_reg, reg2);
     }
     else if (reg1 == reg2)
     {
-	return PREFIX(_copy) (new_reg, reg1);
+        return PREFIX (_copy) (new_reg, reg1);
     }
     else
     {
-	/* General purpose intersection */
-	int overlap; /* result ignored */
-	if (!pixman_op(new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE,
-			&overlap))
+        /* General purpose intersection */
+        int overlap; /* result ignored */
+
+        if (!pixman_op (new_reg, reg1, reg2, pixman_region_intersect_o, FALSE, FALSE,
+                        &overlap))
+	{
 	    return FALSE;
-	pixman_set_extents(new_reg);
+	}
+	
+        pixman_set_extents (new_reg);
     }
 
-    GOOD(new_reg);
+    GOOD (new_reg);
     return(TRUE);
 }
 
-#define MERGERECT(r)						\
-{								\
-    if (r->x1 <= x2) {						\
-	/* Merge with current rectangle */			\
-	if (r->x1 < x2) *overlap = TRUE;				\
-	if (x2 < r->x2) x2 = r->x2;				\
-    } else {							\
-	/* Add current rectangle, start new one */		\
-	NEWRECT(region, next_rect, x1, y1, x2, y2);		\
-	x1 = r->x1;						\
-	x2 = r->x2;						\
-    }								\
-    r++;							\
-}
+#define MERGERECT(r)							\
+    do									\
+    {									\
+        if (r->x1 <= x2)						\
+	{								\
+            /* Merge with current rectangle */				\
+            if (r->x1 < x2)						\
+		*overlap = TRUE;					\
+									\
+            if (x2 < r->x2)						\
+		x2 = r->x2;						\
+	}								\
+	else								\
+	{								\
+            /* Add current rectangle, start new one */			\
+            NEWRECT (region, next_rect, x1, y1, x2, y2);		\
+            x1 = r->x1;							\
+            x2 = r->x2;							\
+	}								\
+        r++;								\
+    } while (0)
 
 /*======================================================================
  *	    Region Union
@@ -1041,61 +1216,65 @@ PREFIX(_intersect) (region_type_t * 	new_reg,
  *-----------------------------------------------------------------------
  */
 static pixman_bool_t
-pixman_region_union_o (
-    region_type_t	 *region,
-    box_type_t *r1,
-    box_type_t *r1_end,
-    box_type_t *r2,
-    box_type_t *r2_end,
-    int	  y1,
-    int	  y2,
-    int		  *overlap)
+pixman_region_union_o (region_type_t *region,
+		       box_type_t *   r1,
+		       box_type_t *   r1_end,
+		       box_type_t *   r2,
+		       box_type_t *   r2_end,
+		       int            y1,
+		       int            y2,
+		       int *          overlap)
 {
-    box_type_t *     next_rect;
-    int        x1;     /* left and right side of current union */
-    int        x2;
+    box_type_t *next_rect;
+    int x1;            /* left and right side of current union */
+    int x2;
 
     assert (y1 < y2);
-    assert(r1 != r1_end && r2 != r2_end);
+    assert (r1 != r1_end && r2 != r2_end);
 
-    next_rect = PIXREGION_TOP(region);
+    next_rect = PIXREGION_TOP (region);
 
     /* Start off current rectangle */
     if (r1->x1 < r2->x1)
     {
-	x1 = r1->x1;
-	x2 = r1->x2;
-	r1++;
+        x1 = r1->x1;
+        x2 = r1->x2;
+        r1++;
     }
     else
     {
-	x1 = r2->x1;
-	x2 = r2->x2;
-	r2++;
+        x1 = r2->x1;
+        x2 = r2->x2;
+        r2++;
     }
     while (r1 != r1_end && r2 != r2_end)
     {
-	if (r1->x1 < r2->x1) MERGERECT(r1) else MERGERECT(r2);
+        if (r1->x1 < r2->x1)
+	    MERGERECT (r1);
+	else
+	    MERGERECT (r2);
     }
 
     /* Finish off whoever (if any) is left */
     if (r1 != r1_end)
     {
-	do
-	{
-	    MERGERECT(r1);
-	} while (r1 != r1_end);
+        do
+        {
+            MERGERECT (r1);
+	}
+        while (r1 != r1_end);
     }
     else if (r2 != r2_end)
     {
-	do
-	{
-	    MERGERECT(r2);
-	} while (r2 != r2_end);
+        do
+        {
+            MERGERECT (r2);
+	}
+        while (r2 != r2_end);
     }
 
     /* Add current rectangle */
-    NEWRECT(region, next_rect, x1, y1, x2, y2);
+    NEWRECT (region, next_rect, x1, y1, x2, y2);
 
     return TRUE;
 }
@@ -1104,99 +1283,110 @@ pixman_region_union_o (
  * single rectangle
  */
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_union_rect) (region_type_t *dest,
-			  region_type_t *source,
-			  int x, int y,
-			  unsigned int width, unsigned int height)
+PREFIX (_union_rect) (region_type_t *dest,
+                      region_type_t *source,
+                      int            x,
+		      int            y,
+                      unsigned int   width,
+		      unsigned int   height)
 {
     region_type_t region;
 
     if (!width || !height)
-	return PREFIX(_copy) (dest, source);
-    region.data = NULL;
+	return PREFIX (_copy) (dest, source);
+    
     region.extents.x1 = x;
     region.extents.y1 = y;
     region.extents.x2 = x + width;
     region.extents.y2 = y + height;
 
-    return PREFIX(_union) (dest, source, &region);
+    region.data = NULL;
+
+    return PREFIX (_union) (dest, source, &region);
 }
 
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_union) (region_type_t *new_reg,
-		     region_type_t *reg1,
-		     region_type_t *reg2)
+PREFIX (_union) (region_type_t *new_reg,
+                 region_type_t *reg1,
+                 region_type_t *reg2)
 {
     int overlap; /* result ignored */
 
     /* Return TRUE if some overlap
      * between reg1, reg2
      */
-    GOOD(reg1);
-    GOOD(reg2);
-    GOOD(new_reg);
+    GOOD (reg1);
+    GOOD (reg2);
+    GOOD (new_reg);
+
     /*  checks all the simple cases */
 
     /*
      * Region 1 and 2 are the same
      */
     if (reg1 == reg2)
-    {
-	return PREFIX(_copy) (new_reg, reg1);
-    }
+        return PREFIX (_copy) (new_reg, reg1);
 
     /*
      * Region 1 is empty
      */
-    if (PIXREGION_NIL(reg1))
+    if (PIXREGION_NIL (reg1))
     {
-	if (PIXREGION_NAR(reg1))
+        if (PIXREGION_NAR (reg1))
 	    return pixman_break (new_reg);
+
         if (new_reg != reg2)
-	    return PREFIX(_copy) (new_reg, reg2);
-        return TRUE;
+	    return PREFIX (_copy) (new_reg, reg2);
+
+	return TRUE;
     }
 
     /*
      * Region 2 is empty
      */
-    if (PIXREGION_NIL(reg2))
+    if (PIXREGION_NIL (reg2))
     {
-	if (PIXREGION_NAR(reg2))
+        if (PIXREGION_NAR (reg2))
 	    return pixman_break (new_reg);
-        if (new_reg != reg1)
-	    return PREFIX(_copy) (new_reg, reg1);
-        return TRUE;
+
+	if (new_reg != reg1)
+	    return PREFIX (_copy) (new_reg, reg1);
+
+	return TRUE;
     }
 
     /*
      * Region 1 completely subsumes region 2
      */
-    if (!reg1->data && SUBSUMES(&reg1->extents, &reg2->extents))
+    if (!reg1->data && SUBSUMES (&reg1->extents, &reg2->extents))
     {
         if (new_reg != reg1)
-	    return PREFIX(_copy) (new_reg, reg1);
-        return TRUE;
+	    return PREFIX (_copy) (new_reg, reg1);
+
+	return TRUE;
     }
 
     /*
      * Region 2 completely subsumes region 1
      */
-    if (!reg2->data && SUBSUMES(&reg2->extents, &reg1->extents))
+    if (!reg2->data && SUBSUMES (&reg2->extents, &reg1->extents))
     {
         if (new_reg != reg2)
-	    return PREFIX(_copy) (new_reg, reg2);
-        return TRUE;
+	    return PREFIX (_copy) (new_reg, reg2);
+
+	return TRUE;
     }
 
-    if (!pixman_op(new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE, &overlap))
+    if (!pixman_op (new_reg, reg1, reg2, pixman_region_union_o, TRUE, TRUE, &overlap))
 	return FALSE;
 
-    new_reg->extents.x1 = MIN(reg1->extents.x1, reg2->extents.x1);
-    new_reg->extents.y1 = MIN(reg1->extents.y1, reg2->extents.y1);
-    new_reg->extents.x2 = MAX(reg1->extents.x2, reg2->extents.x2);
-    new_reg->extents.y2 = MAX(reg1->extents.y2, reg2->extents.y2);
-    GOOD(new_reg);
+    new_reg->extents.x1 = MIN (reg1->extents.x1, reg2->extents.x1);
+    new_reg->extents.y1 = MIN (reg1->extents.y1, reg2->extents.y1);
+    new_reg->extents.x2 = MAX (reg1->extents.x2, reg2->extents.x2);
+    new_reg->extents.y2 = MAX (reg1->extents.y2, reg2->extents.y2);
+    
+    GOOD (new_reg);
+
     return TRUE;
 }
 
@@ -1204,71 +1394,83 @@ PREFIX(_union) (region_type_t *new_reg,
  *	    Batch Rectangle Union
  *====================================================================*/
 
-#define EXCHANGE_RECTS(a, b) \
-{			    \
-    box_type_t     t;	    \
-    t = rects[a];	    \
-    rects[a] = rects[b];    \
-    rects[b] = t;	    \
-}
+#define EXCHANGE_RECTS(a, b)	\
+    {                           \
+        box_type_t t;		\
+        t = rects[a];           \
+        rects[a] = rects[b];    \
+        rects[b] = t;           \
+    }
 
 static void
-quick_sort_rects(
-    box_type_t     rects[],
+quick_sort_rects (
+    box_type_t rects[],
     int        numRects)
 {
-    int	y1;
-    int	x1;
-    int        i, j;
+    int y1;
+    int x1;
+    int i, j;
     box_type_t *r;
 
     /* Always called with numRects > 1 */
 
     do
     {
-	if (numRects == 2)
-	{
-	    if (rects[0].y1 > rects[1].y1 ||
-		    (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1))
-		EXCHANGE_RECTS(0, 1);
-	    return;
+        if (numRects == 2)
+        {
+            if (rects[0].y1 > rects[1].y1 ||
+                (rects[0].y1 == rects[1].y1 && rects[0].x1 > rects[1].x1))
+	    {
+		EXCHANGE_RECTS (0, 1);
+	    }
+
+            return;
 	}
 
-	/* Choose partition element, stick in location 0 */
-        EXCHANGE_RECTS(0, numRects >> 1);
-	y1 = rects[0].y1;
-	x1 = rects[0].x1;
+        /* Choose partition element, stick in location 0 */
+        EXCHANGE_RECTS (0, numRects >> 1);
+        y1 = rects[0].y1;
+        x1 = rects[0].x1;
 
         /* Partition array */
         i = 0;
         j = numRects;
+
         do
-	{
-	    r = &(rects[i]);
-	    do
-	    {
-		r++;
-		i++;
-            } while (i != numRects &&
-		     (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1)));
+        {
+            r = &(rects[i]);
+            do
+            {
+                r++;
+                i++;
+	    }
+
+            while (i != numRects && (r->y1 < y1 || (r->y1 == y1 && r->x1 < x1)))
+		;
+
 	    r = &(rects[j]);
-	    do
-	    {
-		r--;
-		j--;
-            } while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1));
+            do
+            {
+                r--;
+                j--;
+	    }
+            while (y1 < r->y1 || (y1 == r->y1 && x1 < r->x1));
+	    
             if (i < j)
-		EXCHANGE_RECTS(i, j);
-        } while (i < j);
+		EXCHANGE_RECTS (i, j);
+	}
+        while (i < j);
 
         /* Move partition element back to middle */
-        EXCHANGE_RECTS(0, j);
+        EXCHANGE_RECTS (0, j);
+
+        /* Recurse */
+        if (numRects - j - 1 > 1)
+	    quick_sort_rects (&rects[j + 1], numRects - j - 1);
 
-	/* Recurse */
-        if (numRects-j-1 > 1)
-	    quick_sort_rects(&rects[j+1], numRects-j-1);
         numRects = j;
-    } while (numRects > 1);
+    }
+    while (numRects > 1);
 }
 
 /*-
@@ -1306,61 +1508,66 @@ quick_sort_rects(
 
 static pixman_bool_t
 validate (region_type_t * badreg,
-	  int *overlap)
+          int *           overlap)
 {
     /* Descriptor for regions under construction  in Step 2. */
-    typedef struct {
-	region_type_t   reg;
-	int	    prev_band;
-	int	    cur_band;
+    typedef struct
+    {
+        region_type_t reg;
+        int prev_band;
+        int cur_band;
     } region_info_t;
 
     region_info_t stack_regions[64];
 
-	     int	numRects;   /* Original numRects for badreg	    */
-	     region_info_t *ri;	    /* Array of current regions		    */
-    	     int	num_ri;      /* Number of entries used in ri	    */
-	     int	size_ri;	    /* Number of entries available in ri    */
-	     int	i;	    /* Index into rects			    */
-    int	j;	    /* Index into ri			    */
-    region_info_t *rit;       /* &ri[j]				    */
-    region_type_t *  reg;        /* ri[j].reg			    */
-    box_type_t *	box;	    /* Current box in rects		    */
-    box_type_t *	ri_box;      /* Last box in ri[j].reg		    */
-    region_type_t *  hreg;       /* ri[j_half].reg			    */
+    int numRects;                   /* Original numRects for badreg	    */
+    region_info_t *ri;              /* Array of current regions		    */
+    int num_ri;                     /* Number of entries used in ri	    */
+    int size_ri;                    /* Number of entries available in ri    */
+    int i;                          /* Index into rects			    */
+    int j;                          /* Index into ri			    */
+    region_info_t *rit;             /* &ri[j]				    */
+    region_type_t *reg;             /* ri[j].reg			    */
+    box_type_t *box;                /* Current box in rects		    */
+    box_type_t *ri_box;             /* Last box in ri[j].reg		    */
+    region_type_t *hreg;            /* ri[j_half].reg			    */
     pixman_bool_t ret = TRUE;
 
     *overlap = FALSE;
     if (!badreg->data)
     {
-	GOOD(badreg);
-	return TRUE;
+        GOOD (badreg);
+        return TRUE;
     }
+    
     numRects = badreg->data->numRects;
     if (!numRects)
     {
-	if (PIXREGION_NAR(badreg))
+        if (PIXREGION_NAR (badreg))
 	    return FALSE;
-	GOOD(badreg);
-	return TRUE;
+        GOOD (badreg);
+        return TRUE;
     }
+    
     if (badreg->extents.x1 < badreg->extents.x2)
     {
-	if ((numRects) == 1)
-	{
-	    FREE_DATA(badreg);
-	    badreg->data = (region_data_type_t *) NULL;
+        if ((numRects) == 1)
+        {
+            FREE_DATA (badreg);
+            badreg->data = (region_data_type_t *) NULL;
 	}
-	else
-	{
-	    DOWNSIZE(badreg, numRects);
+        else
+        {
+            DOWNSIZE (badreg, numRects);
 	}
-	GOOD(badreg);
+
+        GOOD (badreg);
+
 	return TRUE;
     }
 
     /* Step 1: Sort the rects array into ascending (y1, x1) order */
-    quick_sort_rects(PIXREGION_BOXPTR(badreg), numRects);
+    quick_sort_rects (PIXREGION_BOXPTR (badreg), numRects);
 
     /* Step 2: Scatter the sorted array into the minimum number of regions */
 
@@ -1372,142 +1579,176 @@ validate (region_type_t * badreg,
     ri[0].prev_band = 0;
     ri[0].cur_band = 0;
     ri[0].reg = *badreg;
-    box = PIXREGION_BOXPTR(&ri[0].reg);
+    box = PIXREGION_BOXPTR (&ri[0].reg);
     ri[0].reg.extents = *box;
     ri[0].reg.data->numRects = 1;
     badreg->extents = *pixman_region_empty_box;
     badreg->data = pixman_region_empty_data;
 
     /* Now scatter rectangles into the minimum set of valid regions.  If the
-       next rectangle to be added to a region would force an existing rectangle
-       in the region to be split up in order to maintain y-x banding, just
-       forget it.  Try the next region.  If it doesn't fit cleanly into any
-       region, make a new one. */
+     * next rectangle to be added to a region would force an existing rectangle
+     * in the region to be split up in order to maintain y-x banding, just
+     * forget it.  Try the next region.  If it doesn't fit cleanly into any
+     * region, make a new one.
+     */
 
     for (i = numRects; --i > 0;)
     {
-	box++;
-	/* Look for a region to append box to */
-	for (j = num_ri, rit = ri; --j >= 0; rit++)
-	{
-	    reg = &rit->reg;
-	    ri_box = PIXREGION_END(reg);
-
-	    if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2)
-	    {
-		/* box is in same band as ri_box.  Merge or append it */
-		if (box->x1 <= ri_box->x2)
-		{
-		    /* Merge it with ri_box */
-		    if (box->x1 < ri_box->x2) *overlap = TRUE;
-		    if (box->x2 > ri_box->x2) ri_box->x2 = box->x2;
+        box++;
+        /* Look for a region to append box to */
+        for (j = num_ri, rit = ri; --j >= 0; rit++)
+        {
+            reg = &rit->reg;
+            ri_box = PIXREGION_END (reg);
+
+            if (box->y1 == ri_box->y1 && box->y2 == ri_box->y2)
+            {
+                /* box is in same band as ri_box.  Merge or append it */
+                if (box->x1 <= ri_box->x2)
+                {
+                    /* Merge it with ri_box */
+                    if (box->x1 < ri_box->x2)
+			*overlap = TRUE;
+
+                    if (box->x2 > ri_box->x2)
+			ri_box->x2 = box->x2;
 		}
-		else
-		{
-		    RECTALLOC_BAIL(reg, 1, bail);
-		    *PIXREGION_TOP(reg) = *box;
-		    reg->data->numRects++;
+                else
+                {
+                    RECTALLOC_BAIL (reg, 1, bail);
+                    *PIXREGION_TOP (reg) = *box;
+                    reg->data->numRects++;
 		}
-		goto next_rect;   /* So sue me */
+		
+                goto next_rect;   /* So sue me */
 	    }
-	    else if (box->y1 >= ri_box->y2)
-	    {
-		/* Put box into new band */
-		if (reg->extents.x2 < ri_box->x2) reg->extents.x2 = ri_box->x2;
-		if (reg->extents.x1 > box->x1)   reg->extents.x1 = box->x1;
-		COALESCE(reg, rit->prev_band, rit->cur_band);
-		rit->cur_band = reg->data->numRects;
-		RECTALLOC_BAIL(reg, 1, bail);
-		*PIXREGION_TOP(reg) = *box;
-		reg->data->numRects++;
-		goto next_rect;
+            else if (box->y1 >= ri_box->y2)
+            {
+                /* Put box into new band */
+                if (reg->extents.x2 < ri_box->x2)
+		    reg->extents.x2 = ri_box->x2;
+		
+                if (reg->extents.x1 > box->x1)
+		    reg->extents.x1 = box->x1;
+		
+                COALESCE (reg, rit->prev_band, rit->cur_band);
+                rit->cur_band = reg->data->numRects;
+                RECTALLOC_BAIL (reg, 1, bail);
+                *PIXREGION_TOP (reg) = *box;
+                reg->data->numRects++;
+
+                goto next_rect;
 	    }
-	    /* Well, this region was inappropriate.  Try the next one. */
+            /* Well, this region was inappropriate.  Try the next one. */
 	} /* for j */
 
-	/* Uh-oh.  No regions were appropriate.  Create a new one. */
-	if (size_ri == num_ri)
-	{
-	    size_t data_size;
-	    
-	    /* Oops, allocate space for new region information */
-	    size_ri <<= 1;
+        /* Uh-oh.  No regions were appropriate.  Create a new one. */
+        if (size_ri == num_ri)
+        {
+            size_t data_size;
+
+            /* Oops, allocate space for new region information */
+            size_ri <<= 1;
 
             data_size = size_ri * sizeof(region_info_t);
             if (data_size / size_ri != sizeof(region_info_t))
-                goto bail;
-	    if (ri == stack_regions) {
-		rit = malloc (data_size);
-		if (!rit)
+		goto bail;
+
+            if (ri == stack_regions)
+            {
+                rit = malloc (data_size);
+                if (!rit)
 		    goto bail;
-		memcpy (rit, ri, num_ri * sizeof (region_info_t));
-	    } else {
-		rit = (region_info_t *) realloc(ri, data_size);
-		if (!rit)
+                memcpy (rit, ri, num_ri * sizeof (region_info_t));
+	    }
+            else
+            {
+                rit = (region_info_t *) realloc (ri, data_size);
+                if (!rit)
 		    goto bail;
 	    }
-	    ri = rit;
-	    rit = &ri[num_ri];
+            ri = rit;
+            rit = &ri[num_ri];
 	}
-	num_ri++;
-	rit->prev_band = 0;
-	rit->cur_band = 0;
-	rit->reg.extents = *box;
-	rit->reg.data = (region_data_type_t *)NULL;
-	if (!pixman_rect_alloc(&rit->reg, (i+num_ri) / num_ri)) /* MUST force allocation */
+        num_ri++;
+        rit->prev_band = 0;
+        rit->cur_band = 0;
+        rit->reg.extents = *box;
+        rit->reg.data = (region_data_type_t *)NULL;
+
+	/* MUST force allocation */
+        if (!pixman_rect_alloc (&rit->reg, (i + num_ri) / num_ri))
 	    goto bail;
-next_rect: ;
+	
+    next_rect: ;
     } /* for i */
 
     /* Make a final pass over each region in order to COALESCE and set
-       extents.x2 and extents.y2 */
-
+     * extents.x2 and extents.y2
+     */
     for (j = num_ri, rit = ri; --j >= 0; rit++)
     {
-	reg = &rit->reg;
-	ri_box = PIXREGION_END(reg);
-	reg->extents.y2 = ri_box->y2;
-	if (reg->extents.x2 < ri_box->x2) reg->extents.x2 = ri_box->x2;
-	COALESCE(reg, rit->prev_band, rit->cur_band);
+        reg = &rit->reg;
+        ri_box = PIXREGION_END (reg);
+        reg->extents.y2 = ri_box->y2;
+
+        if (reg->extents.x2 < ri_box->x2)
+	    reg->extents.x2 = ri_box->x2;
+	
+        COALESCE (reg, rit->prev_band, rit->cur_band);
+
 	if (reg->data->numRects == 1) /* keep unions happy below */
-	{
-	    FREE_DATA(reg);
-	    reg->data = (region_data_type_t *)NULL;
+        {
+            FREE_DATA (reg);
+            reg->data = (region_data_type_t *)NULL;
 	}
     }
 
     /* Step 3: Union all regions into a single region */
     while (num_ri > 1)
     {
-	int half = num_ri/2;
-	for (j = num_ri & 1; j < (half + (num_ri & 1)); j++)
-	{
-	    reg = &ri[j].reg;
-	    hreg = &ri[j+half].reg;
-	    if (!pixman_op(reg, reg, hreg, pixman_region_union_o, TRUE, TRUE, overlap))
+        int half = num_ri / 2;
+        for (j = num_ri & 1; j < (half + (num_ri & 1)); j++)
+        {
+            reg = &ri[j].reg;
+            hreg = &ri[j + half].reg;
+
+            if (!pixman_op (reg, reg, hreg, pixman_region_union_o, TRUE, TRUE, overlap))
 		ret = FALSE;
-	    if (hreg->extents.x1 < reg->extents.x1)
+
+            if (hreg->extents.x1 < reg->extents.x1)
 		reg->extents.x1 = hreg->extents.x1;
-	    if (hreg->extents.y1 < reg->extents.y1)
+
+            if (hreg->extents.y1 < reg->extents.y1)
 		reg->extents.y1 = hreg->extents.y1;
-	    if (hreg->extents.x2 > reg->extents.x2)
+
+            if (hreg->extents.x2 > reg->extents.x2)
 		reg->extents.x2 = hreg->extents.x2;
-	    if (hreg->extents.y2 > reg->extents.y2)
+
+            if (hreg->extents.y2 > reg->extents.y2)
 		reg->extents.y2 = hreg->extents.y2;
-	    FREE_DATA(hreg);
+
+            FREE_DATA (hreg);
 	}
-	num_ri -= half;
+
+        num_ri -= half;
+
 	if (!ret)
 	    goto bail;
     }
+
     *badreg = ri[0].reg;
+
     if (ri != stack_regions)
-	free(ri);
-    GOOD(badreg);
+	free (ri);
+
+    GOOD (badreg);
     return ret;
+
 bail:
     for (i = 0; i < num_ri; i++)
-	FREE_DATA(&ri[i].reg);
+	FREE_DATA (&ri[i].reg);
+
     if (ri != stack_regions)
 	free (ri);
 
@@ -1515,7 +1756,7 @@ bail:
 }
 
 /*======================================================================
- * 	    	  Region Subtraction
+ *                Region Subtraction
  *====================================================================*/
 
 /*-
@@ -1534,109 +1775,113 @@ bail:
  */
 /*ARGSUSED*/
 static pixman_bool_t
-pixman_region_subtract_o (
-    region_type_t *	region,
-    box_type_t *	r1,
-    box_type_t *  	  	r1_end,
-    box_type_t *	r2,
-    box_type_t *  	  	r2_end,
-    int  	y1,
-    int  	y2,
-    int		*overlap)
+pixman_region_subtract_o (region_type_t * region,
+			  box_type_t *    r1,
+			  box_type_t *    r1_end,
+			  box_type_t *    r2,
+			  box_type_t *    r2_end,
+			  int             y1,
+			  int             y2,
+			  int *           overlap)
 {
-    box_type_t *	next_rect;
-    int  	x1;
+    box_type_t *        next_rect;
+    int x1;
 
     x1 = r1->x1;
 
-    assert(y1<y2);
-    assert(r1 != r1_end && r2 != r2_end);
+    assert (y1 < y2);
+    assert (r1 != r1_end && r2 != r2_end);
 
-    next_rect = PIXREGION_TOP(region);
+    next_rect = PIXREGION_TOP (region);
 
     do
     {
-	if (r2->x2 <= x1)
-	{
-	    /*
+        if (r2->x2 <= x1)
+        {
+            /*
 	     * Subtrahend entirely to left of minuend: go to next subtrahend.
 	     */
-	    r2++;
+            r2++;
 	}
-	else if (r2->x1 <= x1)
-	{
-	    /*
+        else if (r2->x1 <= x1)
+        {
+            /*
 	     * Subtrahend preceeds minuend: nuke left edge of minuend.
 	     */
-	    x1 = r2->x2;
-	    if (x1 >= r1->x2)
-	    {
-		/*
+            x1 = r2->x2;
+            if (x1 >= r1->x2)
+            {
+                /*
 		 * Minuend completely covered: advance to next minuend and
 		 * reset left fence to edge of new minuend.
 		 */
-		r1++;
-		if (r1 != r1_end)
+                r1++;
+                if (r1 != r1_end)
 		    x1 = r1->x1;
 	    }
-	    else
-	    {
-		/*
+            else
+            {
+                /*
 		 * Subtrahend now used up since it doesn't extend beyond
 		 * minuend
 		 */
-		r2++;
+                r2++;
 	    }
 	}
-	else if (r2->x1 < r1->x2)
-	{
-	    /*
+        else if (r2->x1 < r1->x2)
+        {
+            /*
 	     * Left part of subtrahend covers part of minuend: add uncovered
 	     * part of minuend to region and skip to next subtrahend.
 	     */
-	    assert(x1<r2->x1);
-	    NEWRECT(region, next_rect, x1, y1, r2->x1, y2);
+            assert (x1 < r2->x1);
+            NEWRECT (region, next_rect, x1, y1, r2->x1, y2);
 
-	    x1 = r2->x2;
-	    if (x1 >= r1->x2)
-	    {
-		/*
+            x1 = r2->x2;
+            if (x1 >= r1->x2)
+            {
+                /*
 		 * Minuend used up: advance to new...
 		 */
-		r1++;
-		if (r1 != r1_end)
+                r1++;
+                if (r1 != r1_end)
 		    x1 = r1->x1;
 	    }
-	    else
-	    {
-		/*
+            else
+            {
+                /*
 		 * Subtrahend used up
 		 */
-		r2++;
+                r2++;
 	    }
 	}
-	else
-	{
-	    /*
+        else
+        {
+            /*
 	     * Minuend used up: add any remaining piece before advancing.
 	     */
-	    if (r1->x2 > x1)
-		NEWRECT(region, next_rect, x1, y1, r1->x2, y2);
-	    r1++;
+            if (r1->x2 > x1)
+		NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
+
+            r1++;
+
 	    if (r1 != r1_end)
 		x1 = r1->x1;
 	}
-    } while ((r1 != r1_end) && (r2 != r2_end));
+    }
+    while ((r1 != r1_end) && (r2 != r2_end));
 
     /*
      * Add remaining minuend rectangles to region.
      */
     while (r1 != r1_end)
     {
-	assert(x1<r1->x2);
-	NEWRECT(region, next_rect, x1, y1, r1->x2, y2);
-	r1++;
-	if (r1 != r1_end)
+        assert (x1 < r1->x2);
+
+        NEWRECT (region, next_rect, x1, y1, r1->x2, y2);
+
+        r1++;
+        if (r1 != r1_end)
 	    x1 = r1->x1;
     }
     return TRUE;
@@ -1657,36 +1902,39 @@ pixman_region_subtract_o (
  *-----------------------------------------------------------------------
  */
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_subtract) (region_type_t *	reg_d,
-		       region_type_t * 	reg_m,
-		       region_type_t *	reg_s)
+PREFIX (_subtract) (region_type_t *reg_d,
+                    region_type_t *reg_m,
+                    region_type_t *reg_s)
 {
     int overlap; /* result ignored */
 
-    GOOD(reg_m);
-    GOOD(reg_s);
-    GOOD(reg_d);
-   /* check for trivial rejects */
-    if (PIXREGION_NIL(reg_m) || PIXREGION_NIL(reg_s) ||
-	!EXTENTCHECK(&reg_m->extents, &reg_s->extents))
+    GOOD (reg_m);
+    GOOD (reg_s);
+    GOOD (reg_d);
+    
+    /* check for trivial rejects */
+    if (PIXREGION_NIL (reg_m) || PIXREGION_NIL (reg_s) ||
+        !EXTENTCHECK (&reg_m->extents, &reg_s->extents))
     {
-	if (PIXREGION_NAR (reg_s))
+        if (PIXREGION_NAR (reg_s))
 	    return pixman_break (reg_d);
-	return PREFIX(_copy) (reg_d, reg_m);
+	
+        return PREFIX (_copy) (reg_d, reg_m);
     }
     else if (reg_m == reg_s)
     {
-	FREE_DATA(reg_d);
-	reg_d->extents.x2 = reg_d->extents.x1;
-	reg_d->extents.y2 = reg_d->extents.y1;
-	reg_d->data = pixman_region_empty_data;
-	return TRUE;
+        FREE_DATA (reg_d);
+        reg_d->extents.x2 = reg_d->extents.x1;
+        reg_d->extents.y2 = reg_d->extents.y1;
+        reg_d->data = pixman_region_empty_data;
+
+        return TRUE;
     }
 
     /* Add those rectangles in region 1 that aren't in region 2,
        do yucky substraction for overlaps, and
        just throw away rectangles in region 2 that aren't in region 1 */
-    if (!pixman_op(reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE, &overlap))
+    if (!pixman_op (reg_d, reg_m, reg_s, pixman_region_subtract_o, TRUE, FALSE, &overlap))
 	return FALSE;
 
     /*
@@ -1696,8 +1944,8 @@ PREFIX(_subtract) (region_type_t *	reg_d,
      * way there's no checking against rectangles that will be nuked
      * due to coalescing, so we have to examine fewer rectangles.
      */
-    pixman_set_extents(reg_d);
-    GOOD(reg_d);
+    pixman_set_extents (reg_d);
+    GOOD (reg_d);
     return TRUE;
 }
 
@@ -1721,33 +1969,37 @@ PREFIX(_subtract) (region_type_t *	reg_d,
  *-----------------------------------------------------------------------
  */
 pixman_bool_t
-PIXMAN_EXPORT PREFIX(_inverse) (region_type_t * 	  new_reg,       /* Destination region */
-		      region_type_t * 	  reg1,         /* Region to invert */
-		      box_type_t *     	  inv_rect) 	/* Bounding box for inversion */
+PIXMAN_EXPORT PREFIX (_inverse) (region_type_t *new_reg,  /* Destination region */
+                                 region_type_t *reg1,     /* Region to invert */
+                                 box_type_t *   inv_rect) /* Bounding box for inversion */
 {
-    region_type_t	  inv_reg;   	/* Quick and dirty region made from the
-				 * bounding box */
-    int	  overlap;	/* result ignored */
+    region_type_t inv_reg; /* Quick and dirty region made from the
+			    * bounding box */
+    int overlap;           /* result ignored */
 
-    GOOD(reg1);
-    GOOD(new_reg);
-   /* check for trivial rejects */
-    if (PIXREGION_NIL(reg1) || !EXTENTCHECK(inv_rect, &reg1->extents))
+    GOOD (reg1);
+    GOOD (new_reg);
+    
+    /* check for trivial rejects */
+    if (PIXREGION_NIL (reg1) || !EXTENTCHECK (inv_rect, &reg1->extents))
     {
-	if (PIXREGION_NAR(reg1))
+        if (PIXREGION_NAR (reg1))
 	    return pixman_break (new_reg);
-	new_reg->extents = *inv_rect;
-	FREE_DATA(new_reg);
-	new_reg->data = (region_data_type_t *)NULL;
+	
+        new_reg->extents = *inv_rect;
+        FREE_DATA (new_reg);
+        new_reg->data = (region_data_type_t *)NULL;
+	
         return TRUE;
     }
 
     /* Add those rectangles in region 1 that aren't in region 2,
-       do yucky substraction for overlaps, and
-       just throw away rectangles in region 2 that aren't in region 1 */
+     * do yucky substraction for overlaps, and
+     * just throw away rectangles in region 2 that aren't in region 1
+     */
     inv_reg.extents = *inv_rect;
     inv_reg.data = (region_data_type_t *)NULL;
-    if (!pixman_op(new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE, &overlap))
+    if (!pixman_op (new_reg, &inv_reg, reg1, pixman_region_subtract_o, TRUE, FALSE, &overlap))
 	return FALSE;
 
     /*
@@ -1757,8 +2009,8 @@ PIXMAN_EXPORT PREFIX(_inverse) (region_type_t * 	  new_reg,       /* Destination
      * way there's no checking against rectangles that will be nuked
      * due to coalescing, so we have to examine fewer rectangles.
      */
-    pixman_set_extents(new_reg);
-    GOOD(new_reg);
+    pixman_set_extents (new_reg);
+    GOOD (new_reg);
     return TRUE;
 }
 
@@ -1780,28 +2032,29 @@ PIXMAN_EXPORT PREFIX(_inverse) (region_type_t * 	  new_reg,       /* Destination
  */
 
 pixman_region_overlap_t
-PIXMAN_EXPORT PREFIX(_contains_rectangle) (region_type_t *  region,
-				 box_type_t *     prect)
+PIXMAN_EXPORT PREFIX (_contains_rectangle) (region_type_t *  region,
+                                            box_type_t *     prect)
 {
-    int	x;
-    int	y;
     box_type_t *     pbox;
     box_type_t *     pbox_end;
-    int			part_in, part_out;
-    int			numRects;
+    int part_in, part_out;
+    int numRects;
+    int x, y;
+
+    GOOD (region);
+
+    numRects = PIXREGION_NUMRECTS (region);
 
-    GOOD(region);
-    numRects = PIXREGION_NUMRECTS(region);
     /* useful optimization */
-    if (!numRects || !EXTENTCHECK(&region->extents, prect))
-        return(PIXMAN_REGION_OUT);
+    if (!numRects || !EXTENTCHECK (&region->extents, prect))
+	return(PIXMAN_REGION_OUT);
 
     if (numRects == 1)
     {
-	/* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */
-	if (SUBSUMES(&region->extents, prect))
+        /* We know that it must be PIXMAN_REGION_IN or PIXMAN_REGION_PART */
+        if (SUBSUMES (&region->extents, prect))
 	    return(PIXMAN_REGION_IN);
-	else
+        else
 	    return(PIXMAN_REGION_PART);
     }
 
@@ -1813,279 +2066,318 @@ PIXMAN_EXPORT PREFIX(_contains_rectangle) (region_type_t *  region,
     y = prect->y1;
 
     /* can stop when both part_out and part_in are TRUE, or we reach prect->y2 */
-    for (pbox = PIXREGION_BOXPTR(region), pbox_end = pbox + numRects;
+    for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects;
          pbox != pbox_end;
          pbox++)
     {
 
         if (pbox->y2 <= y)
-           continue;    /* getting up to speed or skipping remainder of band */
+	    continue;   /* getting up to speed or skipping remainder of band */
 
         if (pbox->y1 > y)
         {
-           part_out = TRUE;      /* missed part of rectangle above */
-           if (part_in || (pbox->y1 >= prect->y2))
-              break;
-           y = pbox->y1;        /* x guaranteed to be == prect->x1 */
-        }
+            part_out = TRUE;     /* missed part of rectangle above */
+            if (part_in || (pbox->y1 >= prect->y2))
+		break;
+            y = pbox->y1;       /* x guaranteed to be == prect->x1 */
+	}
 
         if (pbox->x2 <= x)
-           continue;            /* not far enough over yet */
+	    continue;           /* not far enough over yet */
 
         if (pbox->x1 > x)
         {
-           part_out = TRUE;      /* missed part of rectangle to left */
-           if (part_in)
-              break;
-        }
+            part_out = TRUE;     /* missed part of rectangle to left */
+            if (part_in)
+		break;
+	}
 
         if (pbox->x1 < prect->x2)
         {
             part_in = TRUE;      /* definitely overlap */
             if (part_out)
-               break;
-        }
+		break;
+	}
 
         if (pbox->x2 >= prect->x2)
         {
-           y = pbox->y2;        /* finished with this band */
-           if (y >= prect->y2)
-              break;
-           x = prect->x1;       /* reset x out to left again */
-        }
-	else
-	{
-	    /*
+            y = pbox->y2;       /* finished with this band */
+            if (y >= prect->y2)
+		break;
+            x = prect->x1;      /* reset x out to left again */
+	}
+        else
+        {
+            /*
 	     * Because boxes in a band are maximal width, if the first box
 	     * to overlap the rectangle doesn't completely cover it in that
 	     * band, the rectangle must be partially out, since some of it
 	     * will be uncovered in that band. part_in will have been set true
 	     * by now...
 	     */
-	    part_out = TRUE;
-	    break;
+            part_out = TRUE;
+            break;
 	}
     }
 
     if (part_in)
     {
-	if (y < prect->y2)
+        if (y < prect->y2)
 	    return PIXMAN_REGION_PART;
-	else
+        else
 	    return PIXMAN_REGION_IN;
     }
     else
     {
-	return PIXMAN_REGION_OUT;
+        return PIXMAN_REGION_OUT;
     }
 }
 
 /* PREFIX(_translate) (region, x, y)
-   translates in place
-*/
+ * translates in place
+ */
 
 PIXMAN_EXPORT void
-PREFIX(_translate) (region_type_t * region, int x, int y)
+PREFIX (_translate) (region_type_t *region, int x, int y)
 {
     int x1, x2, y1, y2;
     int nbox;
     box_type_t * pbox;
 
-    GOOD(region);
+    GOOD (region);
     region->extents.x1 = x1 = region->extents.x1 + x;
     region->extents.y1 = y1 = region->extents.y1 + y;
     region->extents.x2 = x2 = region->extents.x2 + x;
     region->extents.y2 = y2 = region->extents.y2 + y;
-    if (((x1 - SHRT_MIN)|(y1 - SHRT_MIN)|(SHRT_MAX - x2)|(SHRT_MAX - y2)) >= 0)
+    
+    if (((x1 - SHRT_MIN) | (y1 - SHRT_MIN) | (SHRT_MAX - x2) | (SHRT_MAX - y2)) >= 0)
     {
-	if (region->data && (nbox = region->data->numRects))
-	{
-	    for (pbox = PIXREGION_BOXPTR(region); nbox--; pbox++)
-	    {
-		pbox->x1 += x;
-		pbox->y1 += y;
-		pbox->x2 += x;
-		pbox->y2 += y;
+        if (region->data && (nbox = region->data->numRects))
+        {
+            for (pbox = PIXREGION_BOXPTR (region); nbox--; pbox++)
+            {
+                pbox->x1 += x;
+                pbox->y1 += y;
+                pbox->x2 += x;
+                pbox->y2 += y;
 	    }
 	}
-	return;
+        return;
     }
-    if (((x2 - SHRT_MIN)|(y2 - SHRT_MIN)|(SHRT_MAX - x1)|(SHRT_MAX - y1)) <= 0)
+
+    if (((x2 - SHRT_MIN) | (y2 - SHRT_MIN) | (SHRT_MAX - x1) | (SHRT_MAX - y1)) <= 0)
     {
-	region->extents.x2 = region->extents.x1;
-	region->extents.y2 = region->extents.y1;
-	FREE_DATA(region);
-	region->data = pixman_region_empty_data;
-	return;
+        region->extents.x2 = region->extents.x1;
+        region->extents.y2 = region->extents.y1;
+        FREE_DATA (region);
+        region->data = pixman_region_empty_data;
+        return;
     }
+
     if (x1 < SHRT_MIN)
 	region->extents.x1 = SHRT_MIN;
     else if (x2 > SHRT_MAX)
 	region->extents.x2 = SHRT_MAX;
+
     if (y1 < SHRT_MIN)
 	region->extents.y1 = SHRT_MIN;
     else if (y2 > SHRT_MAX)
 	region->extents.y2 = SHRT_MAX;
+
     if (region->data && (nbox = region->data->numRects))
     {
-	box_type_t * pbox_out;
+        box_type_t * pbox_out;
 
-	for (pbox_out = pbox = PIXREGION_BOXPTR(region); nbox--; pbox++)
-	{
-	    pbox_out->x1 = x1 = pbox->x1 + x;
-	    pbox_out->y1 = y1 = pbox->y1 + y;
-	    pbox_out->x2 = x2 = pbox->x2 + x;
-	    pbox_out->y2 = y2 = pbox->y2 + y;
-	    if (((x2 - SHRT_MIN)|(y2 - SHRT_MIN)|
-		 (SHRT_MAX - x1)|(SHRT_MAX - y1)) <= 0)
-	    {
-		region->data->numRects--;
-		continue;
+        for (pbox_out = pbox = PIXREGION_BOXPTR (region); nbox--; pbox++)
+        {
+            pbox_out->x1 = x1 = pbox->x1 + x;
+            pbox_out->y1 = y1 = pbox->y1 + y;
+            pbox_out->x2 = x2 = pbox->x2 + x;
+            pbox_out->y2 = y2 = pbox->y2 + y;
+
+            if (((x2 - SHRT_MIN) | (y2 - SHRT_MIN) |
+                 (SHRT_MAX - x1) | (SHRT_MAX - y1)) <= 0)
+            {
+                region->data->numRects--;
+                continue;
 	    }
-	    if (x1 < SHRT_MIN)
+
+            if (x1 < SHRT_MIN)
 		pbox_out->x1 = SHRT_MIN;
-	    else if (x2 > SHRT_MAX)
+            else if (x2 > SHRT_MAX)
 		pbox_out->x2 = SHRT_MAX;
-	    if (y1 < SHRT_MIN)
+
+            if (y1 < SHRT_MIN)
 		pbox_out->y1 = SHRT_MIN;
-	    else if (y2 > SHRT_MAX)
+            else if (y2 > SHRT_MAX)
 		pbox_out->y2 = SHRT_MAX;
-	    pbox_out++;
+
+            pbox_out++;
 	}
-	if (pbox_out != pbox)
-	{
-	    if (region->data->numRects == 1)
+
+        if (pbox_out != pbox)
+        {
+            if (region->data->numRects == 1)
+            {
+                region->extents = *PIXREGION_BOXPTR (region);
+                FREE_DATA (region);
+                region->data = (region_data_type_t *)NULL;
+	    }
+            else
 	    {
-		region->extents = *PIXREGION_BOXPTR(region);
-		FREE_DATA(region);
-		region->data = (region_data_type_t *)NULL;
+		pixman_set_extents (region);
 	    }
-	    else
-		pixman_set_extents(region);
 	}
     }
 }
 
 PIXMAN_EXPORT void
-PREFIX(_reset) (region_type_t *region, box_type_t *box)
+PREFIX (_reset) (region_type_t *region, box_type_t *box)
 {
-    GOOD(region);
-    assert(box->x1<=box->x2);
-    assert(box->y1<=box->y2);
+    GOOD (region);
+
+    assert (box->x1 <= box->x2);
+    assert (box->y1 <= box->y2);
+
     region->extents = *box;
-    FREE_DATA(region);
+
+    FREE_DATA (region);
+
     region->data = (region_data_type_t *)NULL;
 }
 
 /* box is "return" value */
 PIXMAN_EXPORT int
-PREFIX(_contains_point) (region_type_t * region,
-			     int x, int y,
-			     box_type_t * box)
+PREFIX (_contains_point) (region_type_t * region,
+                          int x, int y,
+                          box_type_t * box)
 {
     box_type_t *pbox, *pbox_end;
     int numRects;
 
-    GOOD(region);
-    numRects = PIXREGION_NUMRECTS(region);
-    if (!numRects || !INBOX(&region->extents, x, y))
-        return(FALSE);
+    GOOD (region);
+    numRects = PIXREGION_NUMRECTS (region);
+
+    if (!numRects || !INBOX (&region->extents, x, y))
+	return(FALSE);
+
     if (numRects == 1)
     {
         if (box)
 	    *box = region->extents;
 
-	return(TRUE);
+        return(TRUE);
     }
-    for (pbox = PIXREGION_BOXPTR(region), pbox_end = pbox + numRects;
+
+    for (pbox = PIXREGION_BOXPTR (region), pbox_end = pbox + numRects;
 	 pbox != pbox_end;
 	 pbox++)
     {
         if (y >= pbox->y2)
-	   continue;		/* not there yet */
-	if ((y < pbox->y1) || (x < pbox->x1))
-	   break;		/* missed it */
-	if (x >= pbox->x2)
-	   continue;		/* not there yet */
+	    continue;           /* not there yet */
+
+        if ((y < pbox->y1) || (x < pbox->x1))
+	    break;              /* missed it */
+
+        if (x >= pbox->x2)
+	    continue;           /* not there yet */
 
         if (box)
 	    *box = *pbox;
 
-	return(TRUE);
+        return(TRUE);
     }
+
     return(FALSE);
 }
 
 PIXMAN_EXPORT int
-PREFIX(_not_empty) (region_type_t * region)
+PREFIX (_not_empty) (region_type_t * region)
 {
-    GOOD(region);
-    return(!PIXREGION_NIL(region));
+    GOOD (region);
+
+    return(!PIXREGION_NIL (region));
 }
 
 PIXMAN_EXPORT box_type_t *
-PREFIX(_extents) (region_type_t * region)
+PREFIX (_extents) (region_type_t * region)
 {
-    GOOD(region);
+    GOOD (region);
+
     return(&region->extents);
 }
 
 /*
-    Clip a list of scanlines to a region.  The caller has allocated the
-    space.  FSorted is non-zero if the scanline origins are in ascending
-    order.
-    returns the number of new, clipped scanlines.
-*/
+ * Clip a list of scanlines to a region.  The caller has allocated the
+ * space.  FSorted is non-zero if the scanline origins are in ascending order.
+ *
+ * returns the number of new, clipped scanlines.
+ */
 
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_selfcheck) (reg)
-    region_type_t * reg;
+PREFIX (_selfcheck) (region_type_t *reg)
 {
     int i, numRects;
 
     if ((reg->extents.x1 > reg->extents.x2) ||
-	(reg->extents.y1 > reg->extents.y2))
+        (reg->extents.y1 > reg->extents.y2))
+    {
 	return FALSE;
-    numRects = PIXREGION_NUMRECTS(reg);
+    }
+
+    numRects = PIXREGION_NUMRECTS (reg);
     if (!numRects)
+    {
 	return ((reg->extents.x1 == reg->extents.x2) &&
-		(reg->extents.y1 == reg->extents.y2) &&
-		(reg->data->size || (reg->data == pixman_region_empty_data)));
+	        (reg->extents.y1 == reg->extents.y2) &&
+	        (reg->data->size || (reg->data == pixman_region_empty_data)));
+    }
     else if (numRects == 1)
+    {
 	return (!reg->data);
+    }
     else
     {
-	box_type_t * pbox_p, * pbox_n;
-	box_type_t box;
+        box_type_t * pbox_p, * pbox_n;
+        box_type_t box;
 
-	pbox_p = PIXREGION_RECTS(reg);
-	box = *pbox_p;
-	box.y2 = pbox_p[numRects-1].y2;
-	pbox_n = pbox_p + 1;
-	for (i = numRects; --i > 0; pbox_p++, pbox_n++)
-	{
-	    if ((pbox_n->x1 >= pbox_n->x2) ||
-		(pbox_n->y1 >= pbox_n->y2))
+        pbox_p = PIXREGION_RECTS (reg);
+        box = *pbox_p;
+        box.y2 = pbox_p[numRects - 1].y2;
+        pbox_n = pbox_p + 1;
+
+        for (i = numRects; --i > 0; pbox_p++, pbox_n++)
+        {
+            if ((pbox_n->x1 >= pbox_n->x2) ||
+                (pbox_n->y1 >= pbox_n->y2))
+	    {
 		return FALSE;
-	    if (pbox_n->x1 < box.x1)
-	        box.x1 = pbox_n->x1;
-	    if (pbox_n->x2 > box.x2)
+	    }
+
+            if (pbox_n->x1 < box.x1)
+		box.x1 = pbox_n->x1;
+	    
+            if (pbox_n->x2 > box.x2)
 		box.x2 = pbox_n->x2;
-	    if ((pbox_n->y1 < pbox_p->y1) ||
-		((pbox_n->y1 == pbox_p->y1) &&
-		 ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2))))
+	    
+            if ((pbox_n->y1 < pbox_p->y1) ||
+                ((pbox_n->y1 == pbox_p->y1) &&
+                 ((pbox_n->x1 < pbox_p->x2) || (pbox_n->y2 != pbox_p->y2))))
+	    {
 		return FALSE;
+	    }
 	}
-	return ((box.x1 == reg->extents.x1) &&
-		(box.x2 == reg->extents.x2) &&
-		(box.y1 == reg->extents.y1) &&
-		(box.y2 == reg->extents.y2));
+
+        return ((box.x1 == reg->extents.x1) &&
+                (box.x2 == reg->extents.x2) &&
+                (box.y1 == reg->extents.y1) &&
+                (box.y2 == reg->extents.y2));
     }
 }
 
 PIXMAN_EXPORT pixman_bool_t
-PREFIX(_init_rects) (region_type_t *region,
-		     box_type_t *boxes, int count)
+PREFIX (_init_rects) (region_type_t *region,
+                      box_type_t *boxes, int count)
 {
     box_type_t *rects;
     int displacement;
@@ -2093,16 +2385,17 @@ PREFIX(_init_rects) (region_type_t *region,
 
     /* if it's 1, then we just want to set the extents, so call
      * the existing method. */
-    if (count == 1) {
-       PREFIX(_init_rect) (region,
-                               boxes[0].x1,
-                               boxes[0].y1,
-                               boxes[0].x2 - boxes[0].x1,
-                               boxes[0].y2 - boxes[0].y1);
-       return TRUE;
+    if (count == 1)
+    {
+        PREFIX (_init_rect) (region,
+                             boxes[0].x1,
+                             boxes[0].y1,
+                             boxes[0].x2 - boxes[0].x1,
+                             boxes[0].y2 - boxes[0].y1);
+        return TRUE;
     }
 
-    PREFIX(_init) (region);
+    PREFIX (_init) (region);
 
     /* if it's 0, don't call pixman_rect_alloc -- 0 rectangles is
      * a special case, and causing pixman_rect_alloc would cause
@@ -2110,27 +2403,27 @@ PREFIX(_init_rects) (region_type_t *region,
      * static pixman_region_empty_data data).
      */
     if (count == 0)
-        return TRUE;
+	return TRUE;
 
-    if (!pixman_rect_alloc(region, count))
+    if (!pixman_rect_alloc (region, count))
 	return FALSE;
 
-    rects = PIXREGION_RECTS(region);
-    
+    rects = PIXREGION_RECTS (region);
+
     /* Copy in the rects */
     memcpy (rects, boxes, sizeof(box_type_t) * count);
     region->data->numRects = count;
 
     /* Eliminate empty and malformed rectangles */
     displacement = 0;
-    
+
     for (i = 0; i < count; ++i)
     {
-	box_type_t *box = &rects[i];
+        box_type_t *box = &rects[i];
 
-	if (box->x1 >= box->x2 || box->y1 >= box->y2)
+        if (box->x1 >= box->x2 || box->y1 >= box->y2)
 	    displacement++;
-	else if (displacement)
+        else if (displacement)
 	    rects[i - displacement] = rects[i];
     }
 
@@ -2141,25 +2434,26 @@ PREFIX(_init_rects) (region_type_t *region,
      */
     if (region->data->numRects == 0)
     {
-	FREE_DATA (region);
-	PREFIX(_init) (region);
-	
-	return TRUE;
+        FREE_DATA (region);
+        PREFIX (_init) (region);
+
+        return TRUE;
     }
 
     if (region->data->numRects == 1)
     {
-	region->extents = rects[0];
+        region->extents = rects[0];
 
-	FREE_DATA (region);
-	region->data = NULL;
+        FREE_DATA (region);
+        region->data = NULL;
 
-	GOOD (region);
-	
-	return TRUE;
+        GOOD (region);
+
+        return TRUE;
     }
 
     /* Validate */
     region->extents.x1 = region->extents.x2 = 0;
+
     return validate (region, &i);
 }
commit 317df68e94498b6a287eb736a6e5991e8b7d2d78
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:59:17 2009 -0400

    Reindent and reformat pixman-radial-gradient.c

diff --git a/pixman/pixman-radial-gradient.c b/pixman/pixman-radial-gradient.c
index 870c4ce..06d6ef9 100644
--- a/pixman/pixman-radial-gradient.c
+++ b/pixman/pixman-radial-gradient.c
@@ -32,8 +32,13 @@
 #include "pixman-private.h"
 
 static void
-radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-				 const uint32_t *mask, uint32_t mask_bits)
+radial_gradient_get_scanline_32 (pixman_image_t *image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 uint32_t *      buffer,
+                                 const uint32_t *mask,
+                                 uint32_t        mask_bits)
 {
     /*
      * In the radial gradient problem we are given two circles (c₁,r₁) and
@@ -153,8 +158,8 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
     gradient_t *gradient = (gradient_t *)image;
     source_image_t *source = (source_image_t *)image;
     radial_gradient_t *radial = (radial_gradient_t *)image;
-    uint32_t       *end = buffer + width;
-    pixman_gradient_walker_t  walker;
+    uint32_t *end = buffer + width;
+    pixman_gradient_walker_t walker;
     pixman_bool_t affine = TRUE;
     double cx = 1.;
     double cy = 0.;
@@ -162,29 +167,37 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
     double rx = x + 0.5;
     double ry = y + 0.5;
     double rz = 1.;
-    
+
     _pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
-    
-    if (source->common.transform) {
+
+    if (source->common.transform)
+    {
 	pixman_vector_t v;
 	/* reference point is the center of the pixel */
-	v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-	v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+	v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+	v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
 	v.vector[2] = pixman_fixed_1;
+	
 	if (!pixman_transform_point_3d (source->common.transform, &v))
 	    return;
+
+	cx = source->common.transform->matrix[0][0] / 65536.;
+	cy = source->common.transform->matrix[1][0] / 65536.;
+	cz = source->common.transform->matrix[2][0] / 65536.;
 	
-	cx = source->common.transform->matrix[0][0]/65536.;
-	cy = source->common.transform->matrix[1][0]/65536.;
-	cz = source->common.transform->matrix[2][0]/65536.;
-	rx = v.vector[0]/65536.;
-	ry = v.vector[1]/65536.;
-	rz = v.vector[2]/65536.;
-	affine = source->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
+	rx = v.vector[0] / 65536.;
+	ry = v.vector[1] / 65536.;
+	rz = v.vector[2] / 65536.;
+
+	affine =
+	    source->common.transform->matrix[2][0] == 0 &&
+	    v.vector[2] == pixman_fixed_1;
     }
-    
-    if (affine) {
-	while (buffer < end) {
+
+    if (affine)
+    {
+	while (buffer < end)
+	{
 	    if (!mask || *mask++ & mask_bits)
 	    {
 		double pdx, pdy;
@@ -194,34 +207,37 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
 		double c1y = radial->c1.y / 65536.0;
 		double r1  = radial->c1.radius / 65536.0;
 		pixman_fixed_48_16_t t;
-		
+
 		pdx = rx - c1x;
 		pdy = ry - c1y;
-		
-		B = -2 * (  pdx * radial->cdx
-			    + pdy * radial->cdy
-			    + r1 * radial->dr);
-		C = (pdx * pdx + pdy * pdy - r1 * r1);
-		
+
+		B = -2 * (pdx * radial->cdx +
+			  pdy * radial->cdy +
+			  r1 * radial->dr);
+		C = pdx * pdx + pdy * pdy - r1 * r1;
+
 		det = (B * B) - (4 * radial->A * C);
 		if (det < 0.0)
 		    det = 0.0;
-		
+
 		if (radial->A < 0)
-		    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
+		    t = (pixman_fixed_48_16_t) ((-B - sqrt (det)) / (2.0 * radial->A) * 65536);
 		else
-		    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-		
-		*(buffer) = _pixman_gradient_walker_pixel (&walker, t);
+		    t = (pixman_fixed_48_16_t) ((-B + sqrt (det)) / (2.0 * radial->A) * 65536);
+
+		*buffer = _pixman_gradient_walker_pixel (&walker, t);
 	    }
 	    ++buffer;
-	    
+
 	    rx += cx;
 	    ry += cy;
 	}
-    } else {
+    }
+    else
+    {
 	/* projective */
-	while (buffer < end) {
+	while (buffer < end)
+	{
 	    if (!mask || *mask++ & mask_bits)
 	    {
 		double pdx, pdy;
@@ -232,41 +248,44 @@ radial_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
 		double r1  = radial->c1.radius / 65536.0;
 		pixman_fixed_48_16_t t;
 		double x, y;
-		
-		if (rz != 0) {
-		    x = rx/rz;
-		    y = ry/rz;
-		} else {
+
+		if (rz != 0)
+		{
+		    x = rx / rz;
+		    y = ry / rz;
+		}
+		else
+		{
 		    x = y = 0.;
 		}
-		
+
 		pdx = x - c1x;
 		pdy = y - c1y;
-		
-		B = -2 * (  pdx * radial->cdx
-			    + pdy * radial->cdy
-			    + r1 * radial->dr);
+
+		B = -2 * (pdx * radial->cdx +
+			  pdy * radial->cdy +
+			  r1 * radial->dr);
 		C = (pdx * pdx + pdy * pdy - r1 * r1);
-		
+
 		det = (B * B) - (4 * radial->A * C);
 		if (det < 0.0)
 		    det = 0.0;
-		
+
 		if (radial->A < 0)
-		    t = (pixman_fixed_48_16_t) ((- B - sqrt(det)) / (2.0 * radial->A) * 65536);
+		    t = (pixman_fixed_48_16_t) ((-B - sqrt (det)) / (2.0 * radial->A) * 65536);
 		else
-		    t = (pixman_fixed_48_16_t) ((- B + sqrt(det)) / (2.0 * radial->A) * 65536);
-		
-		*(buffer) = _pixman_gradient_walker_pixel (&walker, t);
+		    t = (pixman_fixed_48_16_t) ((-B + sqrt (det)) / (2.0 * radial->A) * 65536);
+
+		*buffer = _pixman_gradient_walker_pixel (&walker, t);
 	    }
-	    ++buffer;
 	    
+	    ++buffer;
+
 	    rx += cx;
 	    ry += cy;
 	    rz += cz;
 	}
     }
-    
 }
 
 static void
@@ -277,33 +296,33 @@ radial_gradient_property_changed (pixman_image_t *image)
 }
 
 PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
-				     pixman_point_fixed_t         *outer,
-				     pixman_fixed_t                inner_radius,
-				     pixman_fixed_t                outer_radius,
-				     const pixman_gradient_stop_t *stops,
-				     int                           n_stops)
+pixman_image_create_radial_gradient (pixman_point_fixed_t *        inner,
+                                     pixman_point_fixed_t *        outer,
+                                     pixman_fixed_t                inner_radius,
+                                     pixman_fixed_t                outer_radius,
+                                     const pixman_gradient_stop_t *stops,
+                                     int                           n_stops)
 {
     pixman_image_t *image;
     radial_gradient_t *radial;
-    
+
     return_val_if_fail (n_stops >= 2, NULL);
-    
-    image = _pixman_image_allocate();
-    
+
+    image = _pixman_image_allocate ();
+
     if (!image)
 	return NULL;
-    
+
     radial = &image->radial;
-    
+
     if (!_pixman_init_gradient (&radial->common, stops, n_stops))
     {
 	free (image);
 	return NULL;
     }
-    
+
     image->type = RADIAL;
-    
+
     radial->c1.x = inner->x;
     radial->c1.y = inner->y;
     radial->c1.radius = inner_radius;
@@ -313,14 +332,14 @@ pixman_image_create_radial_gradient (pixman_point_fixed_t         *inner,
     radial->cdx = pixman_fixed_to_double (radial->c2.x - radial->c1.x);
     radial->cdy = pixman_fixed_to_double (radial->c2.y - radial->c1.y);
     radial->dr = pixman_fixed_to_double (radial->c2.radius - radial->c1.radius);
-    radial->A = (radial->cdx * radial->cdx
-		 + radial->cdy * radial->cdy
-		 - radial->dr  * radial->dr);
-    
+    radial->A = (radial->cdx * radial->cdx +
+		 radial->cdy * radial->cdy -
+		 radial->dr  * radial->dr);
+
     image->common.property_changed = radial_gradient_property_changed;
-    
+
     radial_gradient_property_changed (image);
-    
+
     return image;
 }
 
commit 8820c81b50299f13791594fe6ddd01d536745231
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:37:45 2009 -0400

    Reindent and reformat pixman-mmx.c

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index fbc7322..713b9f0 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -101,40 +101,40 @@ typedef struct
 } mmx_data_t;
 
 #if defined(_MSC_VER)
-# define MMXDATA_INIT(field, val) { val##UI64 }
-#elif defined(M64_MEMBER)	/* __m64 is a struct, not an integral type */
-# define MMXDATA_INIT(field, val) field =   { val##ULL }
-#else				/* __m64 is an integral type */
-# define MMXDATA_INIT(field, val) field =   val##ULL
+# define MMXDATA_INIT(field, val) { val ## UI64 }
+#elif defined(M64_MEMBER)       /* __m64 is a struct, not an integral type */
+# define MMXDATA_INIT(field, val) field =   { val ## ULL }
+#else                           /* __m64 is an integral type */
+# define MMXDATA_INIT(field, val) field =   val ## ULL
 #endif
 
 static const mmx_data_t c =
 {
-    MMXDATA_INIT(.mmx_4x00ff,			0x00ff00ff00ff00ff),
-    MMXDATA_INIT(.mmx_4x0080,			0x0080008000800080),
-    MMXDATA_INIT(.mmx_565_rgb,			0x000001f0003f001f),
-    MMXDATA_INIT(.mmx_565_unpack_multiplier,	0x0000008404100840),
-    MMXDATA_INIT(.mmx_565_r,			0x000000f800000000),
-    MMXDATA_INIT(.mmx_565_g,			0x0000000000fc0000),
-    MMXDATA_INIT(.mmx_565_b,			0x00000000000000f8),
-    MMXDATA_INIT(.mmx_mask_0,			0xffffffffffff0000),
-    MMXDATA_INIT(.mmx_mask_1,			0xffffffff0000ffff),
-    MMXDATA_INIT(.mmx_mask_2,			0xffff0000ffffffff),
-    MMXDATA_INIT(.mmx_mask_3,			0x0000ffffffffffff),
-    MMXDATA_INIT(.mmx_full_alpha,		0x00ff000000000000),
-    MMXDATA_INIT(.mmx_ffff0000ffff0000,		0xffff0000ffff0000),
-    MMXDATA_INIT(.mmx_0000ffff00000000,		0x0000ffff00000000),
-    MMXDATA_INIT(.mmx_000000000000ffff,		0x000000000000ffff),
+    MMXDATA_INIT (.mmx_4x00ff,                   0x00ff00ff00ff00ff),
+    MMXDATA_INIT (.mmx_4x0080,                   0x0080008000800080),
+    MMXDATA_INIT (.mmx_565_rgb,                  0x000001f0003f001f),
+    MMXDATA_INIT (.mmx_565_unpack_multiplier,    0x0000008404100840),
+    MMXDATA_INIT (.mmx_565_r,                    0x000000f800000000),
+    MMXDATA_INIT (.mmx_565_g,                    0x0000000000fc0000),
+    MMXDATA_INIT (.mmx_565_b,                    0x00000000000000f8),
+    MMXDATA_INIT (.mmx_mask_0,                   0xffffffffffff0000),
+    MMXDATA_INIT (.mmx_mask_1,                   0xffffffff0000ffff),
+    MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
+    MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
+    MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
+    MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
+    MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
+    MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
 };
 
 #ifdef __GNUC__
 #    ifdef __ICC
-#        define MC(x)  M64(c.mmx_##x)
+#        define MC(x)  M64 (c.mmx_ ## x)
 #    else
-#        define MC(x) ((__m64)c.mmx_##x)
+#        define MC(x) ((__m64)c.mmx_ ## x)
 #    endif
 #else
-#    define MC(x) c.mmx_##x
+#    define MC(x) c.mmx_ ## x
 #endif
 
 static force_inline __m64
@@ -142,12 +142,12 @@ M64 (uint64_t x)
 {
 #ifdef __ICC
     return _mm_cvtsi64_m64 (x);
-#elif defined M64_MEMBER	/* __m64 is a struct, not an integral type */
+#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
     __m64 res;
 
     res.M64_MEMBER = x;
     return res;
-#else				/* __m64 is an integral type */
+#else                           /* __m64 is an integral type */
     return (__m64)x;
 #endif
 }
@@ -157,16 +157,17 @@ UINT64 (__m64 x)
 {
 #ifdef __ICC
     return _mm_cvtm64_si64 (x);
-#elif defined M64_MEMBER	/* __m64 is a struct, not an integral type */
+#elif defined M64_MEMBER        /* __m64 is a struct, not an integral type */
     uint64_t res = x.M64_MEMBER;
     return res;
-#else				/* __m64 is an integral type */
+#else                           /* __m64 is an integral type */
     return (uint64_t)x;
 #endif
 }
 
 static force_inline __m64
-shift (__m64 v, int s)
+shift (__m64 v,
+       int   s)
 {
     if (s > 0)
 	return _mm_slli_si64 (v, s);
@@ -179,7 +180,7 @@ shift (__m64 v, int s)
 static force_inline __m64
 negate (__m64 mask)
 {
-    return _mm_xor_si64 (mask, MC(4x00ff));
+    return _mm_xor_si64 (mask, MC (4x00ff));
 }
 
 static force_inline __m64
@@ -188,7 +189,7 @@ pix_multiply (__m64 a, __m64 b)
     __m64 res;
 
     res = _mm_mullo_pi16 (a, b);
-    res = _mm_adds_pu16 (res, MC(4x0080));
+    res = _mm_adds_pu16 (res, MC (4x0080));
     res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
     res = _mm_srli_pi16 (res, 8);
 
@@ -198,7 +199,7 @@ pix_multiply (__m64 a, __m64 b)
 static force_inline __m64
 pix_add (__m64 a, __m64 b)
 {
-    return  _mm_adds_pu8 (a, b);
+    return _mm_adds_pu8 (a, b);
 }
 
 static force_inline __m64
@@ -239,9 +240,9 @@ invert_colors (__m64 pixel)
 
     x = y = z = pixel;
 
-    x = _mm_and_si64 (x, MC(ffff0000ffff0000));
-    y = _mm_and_si64 (y, MC(000000000000ffff));
-    z = _mm_and_si64 (z, MC(0000ffff00000000));
+    x = _mm_and_si64 (x, MC (ffff0000ffff0000));
+    y = _mm_and_si64 (y, MC (000000000000ffff));
+    z = _mm_and_si64 (z, MC (0000ffff00000000));
 
     y = shift (y, 32);
     z = shift (z, -32);
@@ -253,23 +254,24 @@ invert_colors (__m64 pixel)
 }
 
 static force_inline __m64
-over (__m64 src, __m64 srca, __m64 dest)
+over (__m64 src,
+      __m64 srca,
+      __m64 dest)
 {
-    return  _mm_adds_pu8 (src, pix_multiply(dest, negate(srca)));
+    return _mm_adds_pu8 (src, pix_multiply (dest, negate (srca)));
 }
 
 static force_inline __m64
 over_rev_non_pre (__m64 src, __m64 dest)
 {
     __m64 srca = expand_alpha (src);
-    __m64 srcfaaa = _mm_or_si64 (srca, MC(full_alpha));
+    __m64 srcfaaa = _mm_or_si64 (srca, MC (full_alpha));
 
-    return over(pix_multiply(invert_colors(src), srcfaaa), srca, dest);
+    return over (pix_multiply (invert_colors (src), srcfaaa), srca, dest);
 }
 
 static force_inline __m64
-in (__m64 src,
-    __m64 mask)
+in (__m64 src, __m64 mask)
 {
     return pix_multiply (src, mask);
 }
@@ -277,28 +279,29 @@ in (__m64 src,
 static force_inline __m64
 in_over_full_src_alpha (__m64 src, __m64 mask, __m64 dest)
 {
-    src = _mm_or_si64 (src, MC(full_alpha));
+    src = _mm_or_si64 (src, MC (full_alpha));
 
-    return over(in (src, mask), mask, dest);
+    return over (in (src, mask), mask, dest);
 }
 
 #ifndef _MSC_VER
 static force_inline __m64
-in_over (__m64 src,
-	 __m64 srca,
-	 __m64 mask,
-	 __m64 dest)
+in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
 {
-    return over(in(src, mask), pix_multiply(srca, mask), dest);
+    return over (in (src, mask), pix_multiply (srca, mask), dest);
 }
+
 #else
-#define in_over(src, srca, mask, dest) over(in(src, mask), pix_multiply(srca, mask), dest)
+
+#define in_over(src, srca, mask, dest)					\
+    over (in (src, mask), pix_multiply (srca, mask), dest)
+
 #endif
 
 static force_inline __m64
 load8888 (uint32_t v)
 {
-    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64());
+    return _mm_unpacklo_pi8 (_mm_cvtsi32_si64 (v), _mm_setzero_si64 ());
 }
 
 static force_inline __m64
@@ -310,7 +313,7 @@ pack8888 (__m64 lo, __m64 hi)
 static force_inline uint32_t
 store8888 (__m64 v)
 {
-    return _mm_cvtsi64_si32(pack8888(v, _mm_setzero_si64()));
+    return _mm_cvtsi64_si32 (pack8888 (v, _mm_setzero_si64 ()));
 }
 
 /* Expand 16 bits positioned at @pos (0-3) of a mmx register into
@@ -341,9 +344,9 @@ expand565 (__m64 pixel, int pos)
 
     p = _mm_or_si64 (t1, p);
     p = _mm_or_si64 (t2, p);
-    p = _mm_and_si64 (p, MC(565_rgb));
+    p = _mm_and_si64 (p, MC (565_rgb));
 
-    pixel = _mm_mullo_pi16 (p, MC(565_unpack_multiplier));
+    pixel = _mm_mullo_pi16 (p, MC (565_unpack_multiplier));
     return _mm_srli_pi16 (pixel, 8);
 }
 
@@ -351,15 +354,15 @@ static force_inline __m64
 expand8888 (__m64 in, int pos)
 {
     if (pos == 0)
-	return _mm_unpacklo_pi8 (in, _mm_setzero_si64());
+	return _mm_unpacklo_pi8 (in, _mm_setzero_si64 ());
     else
-	return _mm_unpackhi_pi8 (in, _mm_setzero_si64());
+	return _mm_unpackhi_pi8 (in, _mm_setzero_si64 ());
 }
 
 static force_inline __m64
 expandx888 (__m64 in, int pos)
 {
-    return _mm_or_si64 (expand8888 (in, pos), MC(full_alpha));
+    return _mm_or_si64 (expand8888 (in, pos), MC (full_alpha));
 }
 
 static force_inline __m64
@@ -369,22 +372,22 @@ pack_565 (__m64 pixel, __m64 target, int pos)
     __m64 t = target;
     __m64 r, g, b;
 
-    r = _mm_and_si64 (p, MC(565_r));
-    g = _mm_and_si64 (p, MC(565_g));
-    b = _mm_and_si64 (p, MC(565_b));
+    r = _mm_and_si64 (p, MC (565_r));
+    g = _mm_and_si64 (p, MC (565_g));
+    b = _mm_and_si64 (p, MC (565_b));
 
-    r = shift (r, - (32 - 8) + pos * 16);
-    g = shift (g, - (16 - 3) + pos * 16);
-    b = shift (b, - (0  + 3) + pos * 16);
+    r = shift (r, -(32 - 8) + pos * 16);
+    g = shift (g, -(16 - 3) + pos * 16);
+    b = shift (b, -(0  + 3) + pos * 16);
 
     if (pos == 0)
-	t = _mm_and_si64 (t, MC(mask_0));
+	t = _mm_and_si64 (t, MC (mask_0));
     else if (pos == 1)
-	t = _mm_and_si64 (t, MC(mask_1));
+	t = _mm_and_si64 (t, MC (mask_1));
     else if (pos == 2)
-	t = _mm_and_si64 (t, MC(mask_2));
+	t = _mm_and_si64 (t, MC (mask_2));
     else if (pos == 3)
-	t = _mm_and_si64 (t, MC(mask_3));
+	t = _mm_and_si64 (t, MC (mask_3));
 
     p = _mm_or_si64 (r, t);
     p = _mm_or_si64 (g, p);
@@ -393,26 +396,30 @@ pack_565 (__m64 pixel, __m64 target, int pos)
 }
 
 #ifndef _MSC_VER
+
 static force_inline __m64
 pix_add_mul (__m64 x, __m64 a, __m64 y, __m64 b)
 {
     x = _mm_mullo_pi16 (x, a);
     y = _mm_mullo_pi16 (y, b);
-    x = _mm_adds_pu16 (x, MC(4x0080));
+    x = _mm_adds_pu16 (x, MC (4x0080));
     x = _mm_adds_pu16 (x, y);
     x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8));
     x = _mm_srli_pi16 (x, 8);
 
     return x;
 }
+
 #else
-#define pix_add_mul(x, a, y, b) \
-( x = _mm_mullo_pi16 (x, a), \
-  y = _mm_mullo_pi16 (y, b), \
-  x = _mm_adds_pu16 (x, MC(4x0080)), \
-  x = _mm_adds_pu16 (x, y), \
-  x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)), \
-  _mm_srli_pi16 (x, 8) )
+
+#define pix_add_mul(x, a, y, b)	 \
+    ( x = _mm_mullo_pi16 (x, a), \
+      y = _mm_mullo_pi16 (y, b), \
+      x = _mm_adds_pu16 (x, MC (4x0080)), \
+      x = _mm_adds_pu16 (x, y), \
+      x = _mm_adds_pu16 (x, _mm_srli_pi16 (x, 8)), \
+      _mm_srli_pi16 (x, 8) )
+
 #endif
 
 /* --------------- MMX code patch for fbcompose.c --------------------- */
@@ -437,511 +444,678 @@ combine (const uint32_t *src, const uint32_t *mask)
 }
 
 static void
-mmx_combine_over_u (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_over_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
+    while (dest < end)
+    {
 	uint32_t ssrc = combine (src, mask);
 	uint32_t a = ssrc >> 24;
-	if (a == 0xff) {
+
+	if (a == 0xff)
+	{
 	    *dest = ssrc;
-	} else if (ssrc) {
+	}
+	else if (ssrc)
+	{
 	    __m64 s, sa;
-	    s = load8888(ssrc);
-	    sa = expand_alpha(s);
-	    *dest = store8888(over(s, sa, load8888(*dest)));
+	    s = load8888 (ssrc);
+	    sa = expand_alpha (s);
+	    *dest = store8888 (over (s, sa, load8888 (*dest)));
 	}
+
 	++dest;
 	++src;
 	if (mask)
 	    ++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_over_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_over_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
+    while (dest < end)
+    {
 	__m64 d, da;
 	uint32_t s = combine (src, mask);
-	d = load8888(*dest);
-	da = expand_alpha(d);
-	*dest = store8888(over (d, da, load8888(s)));
-        ++dest;
-        ++src;
+	
+	d = load8888 (*dest);
+	da = expand_alpha (d);
+	*dest = store8888 (over (d, da, load8888 (s)));
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_in_u (pixman_implementation_t *imp, pixman_op_t op,
-	       uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_in_u (pixman_implementation_t *imp,
+                  pixman_op_t              op,
+                  uint32_t *               dest,
+                  const uint32_t *         src,
+                  const uint32_t *         mask,
+                  int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
-        __m64 x, a;
-        x = load8888 (combine (src, mask));
-        a = load8888(*dest);
-        a = expand_alpha(a);
-        x = pix_multiply(x, a);
-        *dest = store8888(x);
-        ++dest;
-        ++src;
+    while (dest < end)
+    {
+	__m64 x, a;
+	
+	x = load8888 (combine (src, mask));
+	a = load8888 (*dest);
+	a = expand_alpha (a);
+	x = pix_multiply (x, a);
+	
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_in_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		      uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_in_reverse_u (pixman_implementation_t *imp,
+                          pixman_op_t              op,
+                          uint32_t *               dest,
+                          const uint32_t *         src,
+                          const uint32_t *         mask,
+                          int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
-        __m64 x, a;
-        x = load8888(*dest);
-        a = load8888(combine (src, mask));
-        a = expand_alpha(a);
-        x = pix_multiply(x, a);
-        *dest = store8888(x);
-        ++dest;
-        ++src;
+    while (dest < end)
+    {
+	__m64 x, a;
+	
+	x = load8888 (*dest);
+	a = load8888 (combine (src, mask));
+	a = expand_alpha (a);
+	x = pix_multiply (x, a);
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_out_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_out_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
-        __m64 x, a;
-        x = load8888(combine (src, mask));
-        a = load8888(*dest);
-        a = expand_alpha(a);
-        a = negate(a);
-        x = pix_multiply(x, a);
-        *dest = store8888(x);
-        ++dest;
-        ++src;
+    while (dest < end)
+    {
+	__m64 x, a;
+	
+	x = load8888 (combine (src, mask));
+	a = load8888 (*dest);
+	a = expand_alpha (a);
+	a = negate (a);
+	x = pix_multiply (x, a);
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_out_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-		       uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_out_reverse_u (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
-        __m64 x, a;
-        x = load8888(*dest);
-        a = load8888(combine (src, mask));
-        a = expand_alpha(a);
-        a = negate(a);
-        x = pix_multiply(x, a);
-        *dest = store8888(x);
-        ++dest;
-        ++src;
+    while (dest < end)
+    {
+	__m64 x, a;
+	
+	x = load8888 (*dest);
+	a = load8888 (combine (src, mask));
+	a = expand_alpha (a);
+	a = negate (a);
+	x = pix_multiply (x, a);
+
+	*dest = store8888 (x);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_atop_u (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_atop_u (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
-        __m64 s, da, d, sia;
-        s = load8888(combine (src, mask));
-        d = load8888(*dest);
-        sia = expand_alpha(s);
-        sia = negate(sia);
-        da = expand_alpha(d);
-        s = pix_add_mul (s, da, d, sia);
-        *dest = store8888(s);
-        ++dest;
-        ++src;
+    while (dest < end)
+    {
+	__m64 s, da, d, sia;
+	
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	sia = expand_alpha (s);
+	sia = negate (sia);
+	da = expand_alpha (d);
+	s = pix_add_mul (s, da, d, sia);
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_atop_reverse_u (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_atop_reverse_u (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     const uint32_t *end;
 
     end = dest + width;
 
-    while (dest < end) {
-        __m64 s, dia, d, sa;
-        s = load8888(combine(src, mask));
-        d = load8888(*dest);
-        sa = expand_alpha(s);
-        dia = expand_alpha(d);
-        dia = negate(dia);
+    while (dest < end)
+    {
+	__m64 s, dia, d, sa;
+	
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	sa = expand_alpha (s);
+	dia = expand_alpha (d);
+	dia = negate (dia);
 	s = pix_add_mul (s, dia, d, sa);
-        *dest = store8888(s);
-        ++dest;
-        ++src;
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_xor_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_xor_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     const uint32_t *end = dest + width;
 
-    while (dest < end) {
-        __m64 s, dia, d, sia;
-        s = load8888(combine(src, mask));
-        d = load8888(*dest);
-        sia = expand_alpha(s);
-        dia = expand_alpha(d);
-        sia = negate(sia);
-        dia = negate(dia);
+    while (dest < end)
+    {
+	__m64 s, dia, d, sia;
+	
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	sia = expand_alpha (s);
+	dia = expand_alpha (d);
+	sia = negate (sia);
+	dia = negate (dia);
 	s = pix_add_mul (s, dia, d, sia);
-        *dest = store8888(s);
-        ++dest;
-        ++src;
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_add_u (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_add_u (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     const uint32_t *end = dest + width;
-    while (dest < end) {
-        __m64 s, d;
-        s = load8888(combine(src,mask));
-        d = load8888(*dest);
-        s = pix_add(s, d);
-        *dest = store8888(s);
-        ++dest;
-        ++src;
+
+    while (dest < end)
+    {
+	__m64 s, d;
+	
+	s = load8888 (combine (src, mask));
+	d = load8888 (*dest);
+	s = pix_add (s, d);
+	*dest = store8888 (s);
+
+	++dest;
+	++src;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_saturate_u (pixman_implementation_t *imp, pixman_op_t op,
-		     uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_saturate_u (pixman_implementation_t *imp,
+                        pixman_op_t              op,
+                        uint32_t *               dest,
+                        const uint32_t *         src,
+                        const uint32_t *         mask,
+                        int                      width)
 {
     const uint32_t *end = dest + width;
-    while (dest < end) {
-        uint32_t s = combine(src,mask);
-        uint32_t d = *dest;
-        __m64 ms = load8888(s);
-        __m64 md = load8888(d);
-        uint32_t sa = s >> 24;
-        uint32_t da = ~d >> 24;
-
-        if (sa > da) {
-            __m64 msa = load8888(DIV_UN8(da, sa) << 24);
-            msa = expand_alpha(msa);
-            ms = pix_multiply(ms, msa);
-        }
-        md = pix_add(md, ms);
-        *dest = store8888(md);
-        ++src;
-        ++dest;
+
+    while (dest < end)
+    {
+	uint32_t s = combine (src, mask);
+	uint32_t d = *dest;
+	__m64 ms = load8888 (s);
+	__m64 md = load8888 (d);
+	uint32_t sa = s >> 24;
+	uint32_t da = ~d >> 24;
+
+	if (sa > da)
+	{
+	    __m64 msa = load8888 (DIV_UN8 (da, sa) << 24);
+	    msa = expand_alpha (msa);
+	    ms = pix_multiply (ms, msa);
+	}
+
+	md = pix_add (md, ms);
+	*dest = store8888 (md);
+
+	++src;
+	++dest;
 	if (mask)
 	    mask++;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
-
 static void
-mmx_combine_src_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_src_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        s = pix_multiply(s, a);
-        *dest = store8888(s);
-        ++src;
-        ++mask;
-        ++dest;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	
+	s = pix_multiply (s, a);
+	*dest = store8888 (s);
+
+	++src;
+	++mask;
+	++dest;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_over_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_over_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 sa = expand_alpha(s);
 
-	*dest = store8888(in_over (s, sa, a, d));
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 sa = expand_alpha (s);
+
+	*dest = store8888 (in_over (s, sa, a, d));
 
-        ++src;
-        ++dest;
-        ++mask;
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_over_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_over_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 da = expand_alpha(d);
 
-	*dest = store8888(over (d, da, in (s, a)));
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+
+	*dest = store8888 (over (d, da, in (s, a)));
 
-        ++src;
-        ++dest;
-        ++mask;
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
-
 static void
-mmx_combine_in_ca (pixman_implementation_t *imp, pixman_op_t op,
-	       uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_in_ca (pixman_implementation_t *imp,
+                   pixman_op_t              op,
+                   uint32_t *               dest,
+                   const uint32_t *         src,
+                   const uint32_t *         mask,
+                   int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 da = expand_alpha(d);
-        s = pix_multiply(s, a);
-        s = pix_multiply(s, da);
-        *dest = store8888(s);
-        ++src;
-        ++dest;
-        ++mask;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	
+	s = pix_multiply (s, a);
+	s = pix_multiply (s, da);
+	*dest = store8888 (s);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_in_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		      uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_in_reverse_ca (pixman_implementation_t *imp,
+                           pixman_op_t              op,
+                           uint32_t *               dest,
+                           const uint32_t *         src,
+                           const uint32_t *         mask,
+                           int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 sa = expand_alpha(s);
-        a = pix_multiply(a, sa);
-        d = pix_multiply(d, a);
-        *dest = store8888(d);
-        ++src;
-        ++dest;
-        ++mask;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 sa = expand_alpha (s);
+	
+	a = pix_multiply (a, sa);
+	d = pix_multiply (d, a);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_out_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_out_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 da = expand_alpha(d);
-        da = negate(da);
-        s = pix_multiply(s, a);
-        s = pix_multiply(s, da);
-        *dest = store8888(s);
-        ++src;
-        ++dest;
-        ++mask;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	
+	da = negate (da);
+	s = pix_multiply (s, a);
+	s = pix_multiply (s, da);
+	*dest = store8888 (s);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_out_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-		       uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_out_reverse_ca (pixman_implementation_t *imp,
+                            pixman_op_t              op,
+                            uint32_t *               dest,
+                            const uint32_t *         src,
+                            const uint32_t *         mask,
+                            int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 sa = expand_alpha(s);
-        a = pix_multiply(a, sa);
-        a = negate(a);
-        d = pix_multiply(d, a);
-        *dest = store8888(d);
-        ++src;
-        ++dest;
-        ++mask;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 sa = expand_alpha (s);
+
+	a = pix_multiply (a, sa);
+	a = negate (a);
+	d = pix_multiply (d, a);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_atop_ca (pixman_implementation_t *imp, pixman_op_t op,
-		 uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_atop_ca (pixman_implementation_t *imp,
+                     pixman_op_t              op,
+                     uint32_t *               dest,
+                     const uint32_t *         src,
+                     const uint32_t *         mask,
+                     int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 da = expand_alpha(d);
-        __m64 sa = expand_alpha(s);
-        s = pix_multiply(s, a);
-        a = pix_multiply(a, sa);
-        a = negate(a);
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	__m64 sa = expand_alpha (s);
+
+	s = pix_multiply (s, a);
+	a = pix_multiply (a, sa);
+	a = negate (a);
 	d = pix_add_mul (d, a, s, da);
-        *dest = store8888(d);
-        ++src;
-        ++dest;
-        ++mask;
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_atop_reverse_ca (pixman_implementation_t *imp, pixman_op_t op,
-			uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
+                             pixman_op_t              op,
+                             uint32_t *               dest,
+                             const uint32_t *         src,
+                             const uint32_t *         mask,
+                             int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 da = expand_alpha(d);
-        __m64 sa = expand_alpha(s);
-        s = pix_multiply(s, a);
-        a = pix_multiply(a, sa);
-        da = negate(da);
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	__m64 sa = expand_alpha (s);
+
+	s = pix_multiply (s, a);
+	a = pix_multiply (a, sa);
+	da = negate (da);
 	d = pix_add_mul (d, a, s, da);
-        *dest = store8888(d);
-        ++src;
-        ++dest;
-        ++mask;
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_xor_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_xor_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        __m64 da = expand_alpha(d);
-        __m64 sa = expand_alpha(s);
-        s = pix_multiply(s, a);
-        a = pix_multiply(a, sa);
-        da = negate(da);
-        a = negate(a);
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+	__m64 da = expand_alpha (d);
+	__m64 sa = expand_alpha (s);
+
+	s = pix_multiply (s, a);
+	a = pix_multiply (a, sa);
+	da = negate (da);
+	a = negate (a);
 	d = pix_add_mul (d, a, s, da);
-        *dest = store8888(d);
-        ++src;
-        ++dest;
-        ++mask;
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
-mmx_combine_add_ca (pixman_implementation_t *imp, pixman_op_t op,
-		uint32_t *dest, const uint32_t *src, const uint32_t *mask, int width)
+mmx_combine_add_ca (pixman_implementation_t *imp,
+                    pixman_op_t              op,
+                    uint32_t *               dest,
+                    const uint32_t *         src,
+                    const uint32_t *         mask,
+                    int                      width)
 {
     const uint32_t *end = src + width;
-    while (src < end) {
-        __m64 a = load8888(*mask);
-        __m64 s = load8888(*src);
-        __m64 d = load8888(*dest);
-        s = pix_multiply(s, a);
-        d = pix_add(s, d);
-        *dest = store8888(d);
-        ++src;
-        ++dest;
-        ++mask;
+
+    while (src < end)
+    {
+	__m64 a = load8888 (*mask);
+	__m64 s = load8888 (*src);
+	__m64 d = load8888 (*dest);
+
+	s = pix_multiply (s, a);
+	d = pix_add (s, d);
+	*dest = store8888 (d);
+
+	++src;
+	++dest;
+	++mask;
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
-/* ------------------ MMX code paths called from fbpict.c ----------------------- */
+/* ------------- MMX code paths called from fbpict.c -------------------- */
 
 static void
 mmx_composite_over_n_8888 (pixman_implementation_t *imp,
-			    pixman_op_t op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int32_t	src_x,
-			    int32_t	src_y,
-			    int32_t	mask_x,
-			    int32_t	mask_y,
-			    int32_t	dest_x,
-			    int32_t	dest_y,
-			    int32_t	width,
-			    int32_t	height)
-{
-    uint32_t	src;
-    uint32_t	*dst_line, *dst;
-    uint16_t	w;
-    int	dst_stride;
-    __m64	vsrc, vsrca;
-
-    CHECKPOINT();
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+                           pixman_op_t              op,
+                           pixman_image_t *         src_image,
+                           pixman_image_t *         mask_image,
+                           pixman_image_t *         dst_image,
+                           int32_t                  src_x,
+                           int32_t                  src_y,
+                           int32_t                  mask_x,
+                           int32_t                  mask_y,
+                           int32_t                  dest_x,
+                           int32_t                  dest_y,
+                           int32_t                  width,
+                           int32_t                  height)
+{
+    uint32_t src;
+    uint32_t    *dst_line, *dst;
+    uint16_t w;
+    int dst_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (src >> 24 == 0)
 	return;
@@ -957,11 +1131,11 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
-	    *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
+	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
 
 	    w--;
 	    dst++;
@@ -974,53 +1148,53 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
 
 	    vdest = *(__m64 *)dst;
 
-	    dest0 = over(vsrc, vsrca, expand8888(vdest, 0));
-	    dest1 = over(vsrc, vsrca, expand8888(vdest, 1));
+	    dest0 = over (vsrc, vsrca, expand8888 (vdest, 0));
+	    dest1 = over (vsrc, vsrca, expand8888 (vdest, 1));
 
-	    *(__m64 *)dst = pack8888(dest0, dest1);
+	    *(__m64 *)dst = pack8888 (dest0, dest1);
 
 	    dst += 2;
 	    w -= 2;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
-	    *dst = store8888(over(vsrc, vsrca, load8888(*dst)));
+	    *dst = store8888 (over (vsrc, vsrca, load8888 (*dst)));
 
 	    w--;
 	    dst++;
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_n_0565 (pixman_implementation_t *imp,
-			    pixman_op_t op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int32_t	src_x,
-			    int32_t	src_y,
-			    int32_t	mask_x,
-			    int32_t	mask_y,
-			    int32_t	dest_x,
-			    int32_t	dest_y,
-			    int32_t	width,
-			    int32_t	height)
-{
-    uint32_t	src;
-    uint16_t	*dst_line, *dst;
-    uint16_t	w;
-    int	dst_stride;
-    __m64	vsrc, vsrca;
-
-    CHECKPOINT();
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+                           pixman_op_t              op,
+                           pixman_image_t *         src_image,
+                           pixman_image_t *         mask_image,
+                           pixman_image_t *         dst_image,
+                           int32_t                  src_x,
+                           int32_t                  src_y,
+                           int32_t                  mask_x,
+                           int32_t                  mask_y,
+                           int32_t                  dest_x,
+                           int32_t                  dest_y,
+                           int32_t                  width,
+                           int32_t                  height)
+{
+    uint32_t src;
+    uint16_t    *dst_line, *dst;
+    uint16_t w;
+    int dst_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (src >> 24 == 0)
 	return;
@@ -1036,14 +1210,15 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
 	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (M64(d), 0);
-	    vdest = pack_565(over(vsrc, vsrca, vdest), vdest, 0);
-	    *dst = UINT64(vdest);
+	    __m64 vdest = expand565 (M64 (d), 0);
+	    
+	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
+	    *dst = UINT64 (vdest);
 
 	    w--;
 	    dst++;
@@ -1055,10 +1230,10 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
 
 	    vdest = *(__m64 *)dst;
 
-	    vdest = pack_565 (over(vsrc, vsrca, expand565(vdest, 0)), vdest, 0);
-	    vdest = pack_565 (over(vsrc, vsrca, expand565(vdest, 1)), vdest, 1);
-	    vdest = pack_565 (over(vsrc, vsrca, expand565(vdest, 2)), vdest, 2);
-	    vdest = pack_565 (over(vsrc, vsrca, expand565(vdest, 3)), vdest, 3);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 0)), vdest, 0);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 1)), vdest, 1);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 2)), vdest, 2);
+	    vdest = pack_565 (over (vsrc, vsrca, expand565 (vdest, 3)), vdest, 3);
 
 	    *(__m64 *)dst = vdest;
 
@@ -1066,47 +1241,48 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
 	    w -= 4;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
 	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (M64(d), 0);
-	    vdest = pack_565(over(vsrc, vsrca, vdest), vdest, 0);
-	    *dst = UINT64(vdest);
+	    __m64 vdest = expand565 (M64 (d), 0);
+	    
+	    vdest = pack_565 (over (vsrc, vsrca, vdest), vdest, 0);
+	    *dst = UINT64 (vdest);
 
 	    w--;
 	    dst++;
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-				      pixman_op_t op,
-				      pixman_image_t * src_image,
-				      pixman_image_t * mask_image,
-				      pixman_image_t * dst_image,
-				      int32_t	src_x,
-				      int32_t	src_y,
-				      int32_t	mask_x,
-				      int32_t	mask_y,
-				      int32_t	dest_x,
-				      int32_t	dest_y,
-				      int32_t	width,
-				      int32_t	height)
-{
-    uint32_t	src, srca;
-    uint32_t	*dst_line;
-    uint32_t	*mask_line;
-    int	dst_stride, mask_stride;
-    __m64	vsrc, vsrca;
-
-    CHECKPOINT();
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+                                   pixman_op_t              op,
+                                   pixman_image_t *         src_image,
+                                   pixman_image_t *         mask_image,
+                                   pixman_image_t *         dst_image,
+                                   int32_t                  src_x,
+                                   int32_t                  src_y,
+                                   int32_t                  mask_x,
+                                   int32_t                  mask_y,
+                                   int32_t                  dest_x,
+                                   int32_t                  dest_y,
+                                   int32_t                  width,
+                                   int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t    *dst_line;
+    uint32_t    *mask_line;
+    int dst_stride, mask_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
@@ -1115,8 +1291,8 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
 
-    vsrc = load8888(src);
-    vsrca = expand_alpha(vsrc);
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
 
     while (height--)
     {
@@ -1130,9 +1306,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		__m64 vdest = load8888(*q);
-		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
-		*q = store8888(vdest);
+		__m64 vdest = load8888 (*q);
+		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
+		*q = store8888 (vdest);
 	    }
 
 	    twidth--;
@@ -1151,12 +1327,12 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 		__m64 dest0, dest1;
 		__m64 vdest = *(__m64 *)q;
 
-		dest0 = in_over(vsrc, vsrca, load8888(m0),
-				expand8888 (vdest, 0));
-		dest1 = in_over(vsrc, vsrca, load8888(m1),
-				expand8888 (vdest, 1));
+		dest0 = in_over (vsrc, vsrca, load8888 (m0),
+		                 expand8888 (vdest, 0));
+		dest1 = in_over (vsrc, vsrca, load8888 (m1),
+		                 expand8888 (vdest, 1));
 
-		*(__m64 *)q = pack8888(dest0, dest1);
+		*(__m64 *)q = pack8888 (dest0, dest1);
 	    }
 
 	    p += 2;
@@ -1170,9 +1346,9 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		__m64 vdest = load8888(*q);
-		vdest = in_over(vsrc, vsrca, load8888(m), vdest);
-		*q = store8888(vdest);
+		__m64 vdest = load8888 (*q);
+		vdest = in_over (vsrc, vsrca, load8888 (m), vdest);
+		*q = store8888 (vdest);
 	    }
 
 	    twidth--;
@@ -1184,33 +1360,33 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	mask_line += mask_stride;
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
-			       pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t	src_x,
-			       int32_t	src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t	mask;
-    __m64	vmask;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    __m64  srca;
-
-    CHECKPOINT();
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    __m64 vmask;
+    int dst_stride, src_stride;
+    uint16_t w;
+    __m64 srca;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -1218,7 +1394,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
     mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
     vmask = load8888 (mask);
-    srca = MC(4x00ff);
+    srca = MC (4x00ff);
 
     while (height--)
     {
@@ -1248,8 +1424,8 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	    __m64 vsrc1 = expand8888 (vs, 1);
 
 	    *(__m64 *)dst = pack8888 (
-		in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
-		in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
+	        in_over (vsrc0, expand_alpha (vsrc0), vmask, expand8888 (vd, 0)),
+	        in_over (vsrc1, expand_alpha (vsrc1), vmask, expand8888 (vd, 1)));
 
 	    w -= 2;
 	    dst += 2;
@@ -1269,33 +1445,33 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
-			       pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t	src_x,
-			       int32_t	src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t	mask;
-    __m64	vmask;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    __m64  srca;
-
-    CHECKPOINT();
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t mask;
+    __m64 vmask;
+    int dst_stride, src_stride;
+    uint16_t w;
+    __m64 srca;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -1303,7 +1479,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 
     mask = mask | mask >> 8 | mask >> 16 | mask >> 24;
     vmask = load8888 (mask);
-    srca = MC(4x00ff);
+    srca = MC (4x00ff);
 
     while (height--)
     {
@@ -1346,36 +1522,36 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	    __m64 vs7 = *(__m64 *)(src + 14);
 
 	    vd0 = pack8888 (
-		in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
-		in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
+	        in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
+	        in_over (expandx888 (vs0, 1), srca, vmask, expand8888 (vd0, 1)));
 
 	    vd1 = pack8888 (
-		in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
-		in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
+	        in_over (expandx888 (vs1, 0), srca, vmask, expand8888 (vd1, 0)),
+	        in_over (expandx888 (vs1, 1), srca, vmask, expand8888 (vd1, 1)));
 
 	    vd2 = pack8888 (
-		in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
-		in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
+	        in_over (expandx888 (vs2, 0), srca, vmask, expand8888 (vd2, 0)),
+	        in_over (expandx888 (vs2, 1), srca, vmask, expand8888 (vd2, 1)));
 
 	    vd3 = pack8888 (
-		in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
-		in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
+	        in_over (expandx888 (vs3, 0), srca, vmask, expand8888 (vd3, 0)),
+	        in_over (expandx888 (vs3, 1), srca, vmask, expand8888 (vd3, 1)));
 
 	    vd4 = pack8888 (
-		in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
-		in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
+	        in_over (expandx888 (vs4, 0), srca, vmask, expand8888 (vd4, 0)),
+	        in_over (expandx888 (vs4, 1), srca, vmask, expand8888 (vd4, 1)));
 
 	    vd5 = pack8888 (
-		in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
-		in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
+	        in_over (expandx888 (vs5, 0), srca, vmask, expand8888 (vd5, 0)),
+	        in_over (expandx888 (vs5, 1), srca, vmask, expand8888 (vd5, 1)));
 
-            vd6 = pack8888 (
-		in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
-		in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
+	    vd6 = pack8888 (
+	        in_over (expandx888 (vs6, 0), srca, vmask, expand8888 (vd6, 0)),
+	        in_over (expandx888 (vs6, 1), srca, vmask, expand8888 (vd6, 1)));
 
 	    vd7 = pack8888 (
-		in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
-		in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
+	        in_over (expandx888 (vs7, 0), srca, vmask, expand8888 (vd7, 0)),
+	        in_over (expandx888 (vs7, 1), srca, vmask, expand8888 (vd7, 1)));
 
 	    *(__m64 *)(dst + 0) = vd0;
 	    *(__m64 *)(dst + 2) = vd1;
@@ -1404,32 +1580,32 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
-			     pixman_op_t op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t	src_x,
-			     int32_t	src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t    s;
-    int	dst_stride, src_stride;
-    uint8_t     a;
-    uint16_t	w;
-
-    CHECKPOINT();
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t s;
+    int dst_stride, src_stride;
+    uint8_t a;
+    uint16_t w;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -1446,41 +1622,46 @@ mmx_composite_over_8888_8888 (pixman_implementation_t *imp,
 	{
 	    s = *src++;
 	    a = s >> 24;
+	    
 	    if (a == 0xff)
+	    {
 		*dst = s;
-	    else if (s) {
+	    }
+	    else if (s)
+	    {
 		__m64 ms, sa;
-		ms = load8888(s);
-		sa = expand_alpha(ms);
-		*dst = store8888(over(ms, sa, load8888(*dst)));
+		ms = load8888 (s);
+		sa = expand_alpha (ms);
+		*dst = store8888 (over (ms, sa, load8888 (*dst)));
 	    }
+	    
 	    dst++;
 	}
     }
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
-			     pixman_op_t op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t      src_x,
-			     int32_t      src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
-{
-    uint16_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-
-    CHECKPOINT();
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
+{
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -1498,41 +1679,42 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (M64(d), 0);
+	    __m64 vdest = expand565 (M64 (d), 0);
 
-	    vdest = pack_565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
+	    vdest = pack_565 (
+		over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
 
-	    *dst = UINT64(vdest);
+	    *dst = UINT64 (vdest);
 
 	    w--;
 	    dst++;
 	    src++;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w >= 4)
 	{
 	    __m64 vsrc0, vsrc1, vsrc2, vsrc3;
 	    __m64 vdest;
 
-	    vsrc0 = load8888(*(src + 0));
-	    vsrc1 = load8888(*(src + 1));
-	    vsrc2 = load8888(*(src + 2));
-	    vsrc3 = load8888(*(src + 3));
+	    vsrc0 = load8888 (*(src + 0));
+	    vsrc1 = load8888 (*(src + 1));
+	    vsrc2 = load8888 (*(src + 2));
+	    vsrc3 = load8888 (*(src + 3));
 
 	    vdest = *(__m64 *)dst;
 
-	    vdest = pack_565(over(vsrc0, expand_alpha(vsrc0), expand565(vdest, 0)), vdest, 0);
-	    vdest = pack_565(over(vsrc1, expand_alpha(vsrc1), expand565(vdest, 1)), vdest, 1);
-	    vdest = pack_565(over(vsrc2, expand_alpha(vsrc2), expand565(vdest, 2)), vdest, 2);
-	    vdest = pack_565(over(vsrc3, expand_alpha(vsrc3), expand565(vdest, 3)), vdest, 3);
+	    vdest = pack_565 (over (vsrc0, expand_alpha (vsrc0), expand565 (vdest, 0)), vdest, 0);
+	    vdest = pack_565 (over (vsrc1, expand_alpha (vsrc1), expand565 (vdest, 1)), vdest, 1);
+	    vdest = pack_565 (over (vsrc2, expand_alpha (vsrc2), expand565 (vdest, 2)), vdest, 2);
+	    vdest = pack_565 (over (vsrc3, expand_alpha (vsrc3), expand565 (vdest, 3)), vdest, 3);
 
 	    *(__m64 *)dst = vdest;
 
@@ -1541,17 +1723,17 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
 	    src += 4;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (M64(d), 0);
+	    __m64 vdest = expand565 (M64 (d), 0);
 
-	    vdest = pack_565(over(vsrc, expand_alpha(vsrc), vdest), vdest, 0);
+	    vdest = pack_565 (over (vsrc, expand_alpha (vsrc), vdest), vdest, 0);
 
-	    *dst = UINT64(vdest);
+	    *dst = UINT64 (vdest);
 
 	    w--;
 	    dst++;
@@ -1559,35 +1741,35 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
-				  pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint32_t	src, srca;
-    uint32_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    __m64	vsrc, vsrca;
-    uint64_t	srcsrc;
-
-    CHECKPOINT();
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    __m64 vsrc, vsrca;
+    uint64_t srcsrc;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
@@ -1609,7 +1791,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
@@ -1617,8 +1799,11 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), load8888(*dst));
-		*dst = store8888(vdest);
+		__m64 vdest = in_over (vsrc, vsrca,
+				       expand_alpha_rev (M64 (m)),
+				       load8888 (*dst));
+		
+		*dst = store8888 (vdest);
 	    }
 
 	    w--;
@@ -1626,11 +1811,12 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	    dst++;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w >= 2)
 	{
 	    uint64_t m0, m1;
+	    
 	    m0 = *mask;
 	    m1 = *(mask + 1);
 
@@ -1645,10 +1831,12 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
 		vdest = *(__m64 *)dst;
 
-		dest0 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m0)), expand8888(vdest, 0));
-		dest1 = in_over(vsrc, vsrca, expand_alpha_rev (M64(m1)), expand8888(vdest, 1));
+		dest0 = in_over (vsrc, vsrca, expand_alpha_rev (M64 (m0)),
+				 expand8888 (vdest, 0));
+		dest1 = in_over (vsrc, vsrca, expand_alpha_rev (M64 (m1)),
+				 expand8888 (vdest, 1));
 
-		*(__m64 *)dst = pack8888(dest0, dest1);
+		*(__m64 *)dst = pack8888 (dest0, dest1);
 	    }
 
 	    mask += 2;
@@ -1656,7 +1844,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	    w -= 2;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
@@ -1664,9 +1852,11 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		__m64 vdest = load8888(*dst);
-		vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), vdest);
-		*dst = store8888(vdest);
+		__m64 vdest = load8888 (*dst);
+
+		vdest = in_over (
+		    vsrc, vsrca, expand_alpha_rev (M64 (m)), vdest);
+		*dst = store8888 (vdest);
 	    }
 
 	    w--;
@@ -1675,25 +1865,26 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 pixman_bool_t
 pixman_fill_mmx (uint32_t *bits,
-		 int stride,
-		 int bpp,
-		 int x,
-		 int y,
-		 int width,
-		 int height,
-		 uint32_t xor)
-{
-    uint64_t	fill;
-    __m64	vfill;
-    uint32_t	byte_width;
-    uint8_t	*byte_line;
+                 int       stride,
+                 int       bpp,
+                 int       x,
+                 int       y,
+                 int       width,
+                 int       height,
+                 uint32_t xor)
+{
+    uint64_t fill;
+    __m64 vfill;
+    uint32_t byte_width;
+    uint8_t     *byte_line;
+
 #ifdef __GNUC__
-    __m64	v1, v2, v3, v4, v5, v6, v7;
+    __m64 v1, v2, v3, v4, v5, v6, v7;
 #endif
 
     if (bpp != 16 && bpp != 32 && bpp != 8)
@@ -1703,12 +1894,12 @@ pixman_fill_mmx (uint32_t *bits,
 	return FALSE;
 
     if (bpp == 8 &&
-	((xor >> 16 != (xor & 0xffff)) ||
-	 (xor >> 24 != (xor & 0x00ff) >> 16)))
+        ((xor >> 16 != (xor & 0xffff)) ||
+         (xor >> 24 != (xor & 0x00ff) >> 16)))
     {
 	return FALSE;
     }
-    
+
     if (bpp == 8)
     {
 	stride = stride * (int) sizeof (uint32_t) / 1;
@@ -1732,19 +1923,19 @@ pixman_fill_mmx (uint32_t *bits,
     }
 
     fill = ((uint64_t)xor << 32) | xor;
-    vfill = M64(fill);
+    vfill = M64 (fill);
 
 #ifdef __GNUC__
     __asm__ (
-	"movq		%7,	%0\n"
-	"movq		%7,	%1\n"
-	"movq		%7,	%2\n"
-	"movq		%7,	%3\n"
-	"movq		%7,	%4\n"
-	"movq		%7,	%5\n"
-	"movq		%7,	%6\n"
+        "movq		%7,	%0\n"
+        "movq		%7,	%1\n"
+        "movq		%7,	%2\n"
+        "movq		%7,	%3\n"
+        "movq		%7,	%4\n"
+        "movq		%7,	%5\n"
+        "movq		%7,	%6\n"
 	: "=y" (v1), "=y" (v2), "=y" (v3),
-	  "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
+        "=y" (v4), "=y" (v5), "=y" (v6), "=y" (v7)
 	: "y" (vfill));
 #endif
 
@@ -1752,6 +1943,7 @@ pixman_fill_mmx (uint32_t *bits,
     {
 	int w;
 	uint8_t *d = byte_line;
+	
 	byte_line += stride;
 	w = byte_width;
 
@@ -1761,7 +1953,7 @@ pixman_fill_mmx (uint32_t *bits,
 	    w--;
 	    d++;
 	}
-	
+
 	while (w >= 2 && ((unsigned long)d & 3))
 	{
 	    *(uint16_t *)d = xor;
@@ -1781,18 +1973,18 @@ pixman_fill_mmx (uint32_t *bits,
 	{
 #ifdef __GNUC__
 	    __asm__ (
-		"movq	%1,	  (%0)\n"
-		"movq	%2,	 8(%0)\n"
-		"movq	%3,	16(%0)\n"
-		"movq	%4,	24(%0)\n"
-		"movq	%5,	32(%0)\n"
-		"movq	%6,	40(%0)\n"
-		"movq	%7,	48(%0)\n"
-		"movq	%8,	56(%0)\n"
+	        "movq	%1,	  (%0)\n"
+	        "movq	%2,	 8(%0)\n"
+	        "movq	%3,	16(%0)\n"
+	        "movq	%4,	24(%0)\n"
+	        "movq	%5,	32(%0)\n"
+	        "movq	%6,	40(%0)\n"
+	        "movq	%7,	48(%0)\n"
+	        "movq	%8,	56(%0)\n"
 		:
 		: "r" (d),
-		  "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
-		  "y" (v4), "y" (v5), "y" (v6), "y" (v7)
+	        "y" (vfill), "y" (v1), "y" (v2), "y" (v3),
+	        "y" (v4), "y" (v5), "y" (v6), "y" (v7)
 		: "memory");
 #else
 	    *(__m64*) (d +  0) = vfill;
@@ -1827,45 +2019,46 @@ pixman_fill_mmx (uint32_t *bits,
 	    w--;
 	    d++;
 	}
-	
+
     }
 
-    _mm_empty();
+    _mm_empty ();
     return TRUE;
 }
 
 static void
 mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
-				     pixman_op_t op,
-				     pixman_image_t * src_image,
-				     pixman_image_t * mask_image,
-				     pixman_image_t * dst_image,
-				     int32_t      src_x,
-				     int32_t      src_y,
-				     int32_t      mask_x,
-				     int32_t      mask_y,
-				     int32_t      dest_x,
-				     int32_t      dest_y,
-				     int32_t     width,
-				     int32_t     height)
-{
-    uint32_t	src, srca;
-    uint32_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    __m64	vsrc, vsrca;
-    uint64_t	srcsrc;
-
-    CHECKPOINT();
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+                            pixman_op_t              op,
+                            pixman_image_t *         src_image,
+                            pixman_image_t *         mask_image,
+                            pixman_image_t *         dst_image,
+                            int32_t                  src_x,
+                            int32_t                  src_y,
+                            int32_t                  mask_x,
+                            int32_t                  mask_y,
+                            int32_t                  dest_x,
+                            int32_t                  dest_y,
+                            int32_t                  width,
+                            int32_t                  height)
+{
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    __m64 vsrc, vsrca;
+    uint64_t srcsrc;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
     {
-	pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride, PIXMAN_FORMAT_BPP (dst_image->bits.format),
-			 dest_x, dest_y, width, height, 0);
+	pixman_fill_mmx (dst_image->bits.bits, dst_image->bits.rowstride,
+			 PIXMAN_FORMAT_BPP (dst_image->bits.format),
+	                 dest_x, dest_y, width, height, 0);
 	return;
     }
 
@@ -1885,7 +2078,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
@@ -1893,8 +2086,9 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		__m64 vdest = in(vsrc, expand_alpha_rev (M64(m)));
-		*dst = store8888(vdest);
+		__m64 vdest = in (vsrc, expand_alpha_rev (M64 (m)));
+		
+		*dst = store8888 (vdest);
 	    }
 	    else
 	    {
@@ -1906,7 +2100,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	    dst++;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w >= 2)
 	{
@@ -1925,10 +2119,10 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 		vdest = *(__m64 *)dst;
 
-		dest0 = in(vsrc, expand_alpha_rev (M64(m0)));
-		dest1 = in(vsrc, expand_alpha_rev (M64(m1)));
+		dest0 = in (vsrc, expand_alpha_rev (M64 (m0)));
+		dest1 = in (vsrc, expand_alpha_rev (M64 (m1)));
 
-		*(__m64 *)dst = pack8888(dest0, dest1);
+		*(__m64 *)dst = pack8888 (dest0, dest1);
 	    }
 	    else
 	    {
@@ -1940,7 +2134,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	    w -= 2;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
@@ -1948,9 +2142,10 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 	    if (m)
 	    {
-		__m64 vdest = load8888(*dst);
-		vdest = in(vsrc, expand_alpha_rev (M64(m)));
-		*dst = store8888(vdest);
+		__m64 vdest = load8888 (*dst);
+		
+		vdest = in (vsrc, expand_alpha_rev (M64 (m)));
+		*dst = store8888 (vdest);
 	    }
 	    else
 	    {
@@ -1963,35 +2158,35 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
-				  pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint32_t	src, srca;
-    uint16_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    __m64	vsrc, vsrca, tmp;
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint32_t src, srca;
+    uint16_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    __m64 vsrc, vsrca, tmp;
     uint64_t srcsrcsrcsrc, src16;
 
-    CHECKPOINT();
+    CHECKPOINT ();
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
@@ -2003,10 +2198,11 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
     vsrc = load8888 (src);
     vsrca = expand_alpha (vsrc);
 
-    tmp = pack_565(vsrc, _mm_setzero_si64(), 0);
-    src16 = UINT64(tmp);
+    tmp = pack_565 (vsrc, _mm_setzero_si64 (), 0);
+    src16 = UINT64 (tmp);
 
-    srcsrcsrcsrc = (uint64_t)src16 << 48 | (uint64_t)src16 << 32 |
+    srcsrcsrcsrc =
+	(uint64_t)src16 << 48 | (uint64_t)src16 << 32 |
 	(uint64_t)src16 << 16 | (uint64_t)src16;
 
     while (height--)
@@ -2017,7 +2213,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
@@ -2026,10 +2222,12 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		uint64_t d = *dst;
-		__m64 vd = M64(d);
-		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565(vd, 0));
-		vd = pack_565(vdest, _mm_setzero_si64(), 0);
-		*dst = UINT64(vd);
+		__m64 vd = M64 (d);
+		__m64 vdest = in_over (
+		    vsrc, vsrca, expand_alpha_rev (M64 (m)), expand565 (vd, 0));
+		
+		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
+		*dst = UINT64 (vd);
 	    }
 
 	    w--;
@@ -2037,7 +2235,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    dst++;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w >= 4)
 	{
@@ -2058,14 +2256,18 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 
 		vdest = *(__m64 *)dst;
 
-		vm0 = M64(m0);
-		vdest = pack_565(in_over(vsrc, vsrca, expand_alpha_rev(vm0), expand565(vdest, 0)), vdest, 0);
-		vm1 = M64(m1);
-		vdest = pack_565(in_over(vsrc, vsrca, expand_alpha_rev(vm1), expand565(vdest, 1)), vdest, 1);
-		vm2 = M64(m2);
-		vdest = pack_565(in_over(vsrc, vsrca, expand_alpha_rev(vm2), expand565(vdest, 2)), vdest, 2);
-		vm3 = M64(m3);
-		vdest = pack_565(in_over(vsrc, vsrca, expand_alpha_rev(vm3), expand565(vdest, 3)), vdest, 3);
+		vm0 = M64 (m0);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm0),
+					   expand565 (vdest, 0)), vdest, 0);
+		vm1 = M64 (m1);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm1),
+					   expand565 (vdest, 1)), vdest, 1);
+		vm2 = M64 (m2);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm2),
+					   expand565 (vdest, 2)), vdest, 2);
+		vm3 = M64 (m3);
+		vdest = pack_565 (in_over (vsrc, vsrca, expand_alpha_rev (vm3),
+					   expand565 (vdest, 3)), vdest, 3);
 
 		*(__m64 *)dst = vdest;
 	    }
@@ -2075,7 +2277,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    dst += 4;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
@@ -2084,10 +2286,11 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		uint64_t d = *dst;
-		__m64 vd = M64(d);
-		__m64 vdest = in_over(vsrc, vsrca, expand_alpha_rev (M64(m)), expand565(vd, 0));
-		vd = pack_565(vdest, _mm_setzero_si64(), 0);
-		*dst = UINT64(vd);
+		__m64 vd = M64 (d);
+		__m64 vdest = in_over (vsrc, vsrca, expand_alpha_rev (M64 (m)),
+				       expand565 (vd, 0));
+		vd = pack_565 (vdest, _mm_setzero_si64 (), 0);
+		*dst = UINT64 (vd);
 	    }
 
 	    w--;
@@ -2096,30 +2299,30 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
-				  pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint16_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-
-    CHECKPOINT();
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -2137,24 +2340,24 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w && (unsigned long)dst & 7)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (M64(d), 0);
+	    __m64 vdest = expand565 (M64 (d), 0);
 
-	    vdest = pack_565(over_rev_non_pre(vsrc, vdest), vdest, 0);
+	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
 
-	    *dst = UINT64(vdest);
+	    *dst = UINT64 (vdest);
 
 	    w--;
 	    dst++;
 	    src++;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w >= 4)
 	{
@@ -2174,10 +2377,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	    if ((a0 & a1 & a2 & a3) == 0xFF)
 	    {
 		__m64 vdest;
-		vdest = pack_565(invert_colors(load8888(s0)), _mm_setzero_si64(), 0);
-		vdest = pack_565(invert_colors(load8888(s1)), vdest, 1);
-		vdest = pack_565(invert_colors(load8888(s2)), vdest, 2);
-		vdest = pack_565(invert_colors(load8888(s3)), vdest, 3);
+		vdest = pack_565 (invert_colors (load8888 (s0)), _mm_setzero_si64 (), 0);
+		vdest = pack_565 (invert_colors (load8888 (s1)), vdest, 1);
+		vdest = pack_565 (invert_colors (load8888 (s2)), vdest, 2);
+		vdest = pack_565 (invert_colors (load8888 (s3)), vdest, 3);
 
 		*(__m64 *)dst = vdest;
 	    }
@@ -2185,10 +2388,10 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	    {
 		__m64 vdest = *(__m64 *)dst;
 
-		vdest = pack_565(over_rev_non_pre(load8888(s0), expand565(vdest, 0)), vdest, 0);
-	        vdest = pack_565(over_rev_non_pre(load8888(s1), expand565(vdest, 1)), vdest, 1);
-		vdest = pack_565(over_rev_non_pre(load8888(s2), expand565(vdest, 2)), vdest, 2);
-		vdest = pack_565(over_rev_non_pre(load8888(s3), expand565(vdest, 3)), vdest, 3);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s0), expand565 (vdest, 0)), vdest, 0);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s1), expand565 (vdest, 1)), vdest, 1);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s2), expand565 (vdest, 2)), vdest, 2);
+		vdest = pack_565 (over_rev_non_pre (load8888 (s3), expand565 (vdest, 3)), vdest, 3);
 
 		*(__m64 *)dst = vdest;
 	    }
@@ -2198,17 +2401,17 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	    src += 4;
 	}
 
-	CHECKPOINT();
+	CHECKPOINT ();
 
 	while (w)
 	{
 	    __m64 vsrc = load8888 (*src);
 	    uint64_t d = *dst;
-	    __m64 vdest = expand565 (M64(d), 0);
+	    __m64 vdest = expand565 (M64 (d), 0);
 
-	    vdest = pack_565(over_rev_non_pre(vsrc, vdest), vdest, 0);
+	    vdest = pack_565 (over_rev_non_pre (vsrc, vdest), vdest, 0);
 
-	    *dst = UINT64(vdest);
+	    *dst = UINT64 (vdest);
 
 	    w--;
 	    dst++;
@@ -2216,30 +2419,30 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
-				  pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
-{
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-
-    CHECKPOINT();
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -2283,8 +2486,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 
 	    if ((a0 & a1) == 0xFF)
 	    {
-		d0 = invert_colors(load8888(s0));
-		d1 = invert_colors(load8888(s1));
+		d0 = invert_colors (load8888 (s0));
+		d1 = invert_colors (load8888 (s1));
 
 		*(__m64 *)dst = pack8888 (d0, d1);
 	    }
@@ -2292,8 +2495,8 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	    {
 		__m64 vdest = *(__m64 *)dst;
 
-		d0 = over_rev_non_pre (load8888(s0), expand8888 (vdest, 0));
-		d1 = over_rev_non_pre (load8888(s1), expand8888 (vdest, 1));
+		d0 = over_rev_non_pre (load8888 (s0), expand8888 (vdest, 0));
+		d1 = over_rev_non_pre (load8888 (s1), expand8888 (vdest, 1));
 
 		*(__m64 *)dst = pack8888 (d0, d1);
 	    }
@@ -2316,33 +2519,33 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-				      pixman_op_t op,
-				      pixman_image_t * src_image,
-				      pixman_image_t * mask_image,
-				      pixman_image_t * dst_image,
-				      int32_t      src_x,
-				      int32_t      src_y,
-				      int32_t      mask_x,
-				      int32_t      mask_y,
-				      int32_t      dest_x,
-				      int32_t      dest_y,
-				      int32_t     width,
-				      int32_t     height)
-{
-    uint32_t	src, srca;
-    uint16_t	*dst_line;
-    uint32_t	*mask_line;
-    int	dst_stride, mask_stride;
-    __m64  vsrc, vsrca;
-
-    CHECKPOINT();
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+                                   pixman_op_t              op,
+                                   pixman_image_t *         src_image,
+                                   pixman_image_t *         mask_image,
+                                   pixman_image_t *         dst_image,
+                                   int32_t                  src_x,
+                                   int32_t                  src_y,
+                                   int32_t                  mask_x,
+                                   int32_t                  mask_y,
+                                   int32_t                  dest_x,
+                                   int32_t                  dest_y,
+                                   int32_t                  width,
+                                   int32_t                  height)
+{
+    uint32_t src, srca;
+    uint16_t    *dst_line;
+    uint32_t    *mask_line;
+    int dst_stride, mask_stride;
+    __m64 vsrc, vsrca;
+
+    CHECKPOINT ();
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (srca == 0)
@@ -2367,9 +2570,9 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		uint64_t d = *q;
-		__m64 vdest = expand565 (M64(d), 0);
+		__m64 vdest = expand565 (M64 (d), 0);
 		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
-		*q = UINT64(vdest);
+		*q = UINT64 (vdest);
 	    }
 
 	    twidth--;
@@ -2390,10 +2593,10 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	    {
 		__m64 vdest = *(__m64 *)q;
 
-		vdest = pack_565(in_over(vsrc, vsrca, load8888(m0), expand565(vdest, 0)), vdest, 0);
-		vdest = pack_565(in_over(vsrc, vsrca, load8888(m1), expand565(vdest, 1)), vdest, 1);
-		vdest = pack_565(in_over(vsrc, vsrca, load8888(m2), expand565(vdest, 2)), vdest, 2);
-		vdest = pack_565(in_over(vsrc, vsrca, load8888(m3), expand565(vdest, 3)), vdest, 3);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m0), expand565 (vdest, 0)), vdest, 0);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m1), expand565 (vdest, 1)), vdest, 1);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m2), expand565 (vdest, 2)), vdest, 2);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m3), expand565 (vdest, 3)), vdest, 3);
 
 		*(__m64 *)q = vdest;
 	    }
@@ -2410,9 +2613,9 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	    if (m)
 	    {
 		uint64_t d = *q;
-		__m64 vdest = expand565(M64(d), 0);
-		vdest = pack_565 (in_over(vsrc, vsrca, load8888(m), vdest), vdest, 0);
-		*q = UINT64(vdest);
+		__m64 vdest = expand565 (M64 (d), 0);
+		vdest = pack_565 (in_over (vsrc, vsrca, load8888 (m), vdest), vdest, 0);
+		*q = UINT64 (vdest);
 	    }
 
 	    twidth--;
@@ -2429,38 +2632,38 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 
 static void
 mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
-			pixman_op_t op,
-			pixman_image_t * src_image,
-			pixman_image_t * mask_image,
-			pixman_image_t * dst_image,
-			int32_t      src_x,
-			int32_t      src_y,
-			int32_t      mask_x,
-			int32_t      mask_y,
-			int32_t      dest_x,
-			int32_t      dest_y,
-			int32_t     width,
-			int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    uint32_t	src;
-    uint8_t	sa;
-    __m64	vsrc, vsrca;
+                        pixman_op_t              op,
+                        pixman_image_t *         src_image,
+                        pixman_image_t *         mask_image,
+                        pixman_image_t *         dst_image,
+                        int32_t                  src_x,
+                        int32_t                  src_y,
+                        int32_t                  mask_x,
+                        int32_t                  mask_y,
+                        int32_t                  dest_x,
+                        int32_t                  dest_y,
+                        int32_t                  width,
+                        int32_t                  height)
+{
+    uint8_t *dst_line, *dst;
+    uint8_t *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    uint32_t src;
+    uint8_t sa;
+    __m64 vsrc, vsrca;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     sa = src >> 24;
     if (sa == 0)
 	return;
 
-    vsrc = load8888(src);
-    vsrca = expand_alpha(vsrc);
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
 
     while (height--)
     {
@@ -2494,42 +2697,42 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
 
 	while (w--)
 	{
-	    uint16_t	tmp;
-	    uint8_t	a;
-	    uint32_t	m, d;
+	    uint16_t tmp;
+	    uint8_t a;
+	    uint32_t m, d;
 
 	    a = *mask++;
 	    d = *dst;
-	    
+
 	    m = MUL_UN8 (sa, a, tmp);
 	    d = MUL_UN8 (m, d, tmp);
-	    
+
 	    *dst++ = d;
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_in_8_8 (pixman_implementation_t *imp,
-		      pixman_op_t op,
-		      pixman_image_t * src_image,
-		      pixman_image_t * mask_image,
-		      pixman_image_t * dst_image,
-		      int32_t      src_x,
-		      int32_t      src_y,
-		      int32_t      mask_x,
-		      int32_t      mask_y,
-		      int32_t      dest_x,
-		      int32_t      dest_y,
-		      int32_t     width,
-		      int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	src_stride, dst_stride;
-    uint16_t	w;
+                      pixman_op_t              op,
+                      pixman_image_t *         src_image,
+                      pixman_image_t *         mask_image,
+                      pixman_image_t *         dst_image,
+                      int32_t                  src_x,
+                      int32_t                  src_y,
+                      int32_t                  mask_x,
+                      int32_t                  mask_y,
+                      int32_t                  dest_x,
+                      int32_t                  dest_y,
+                      int32_t                  width,
+                      int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int src_stride, dst_stride;
+    uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
@@ -2578,38 +2781,38 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
 
 static void
 mmx_composite_add_8888_8_8 (pixman_implementation_t *imp,
-			       pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t      src_x,
-			       int32_t      src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    uint32_t	src;
-    uint8_t	sa;
-    __m64	vsrc, vsrca;
+                            pixman_op_t              op,
+                            pixman_image_t *         src_image,
+                            pixman_image_t *         mask_image,
+                            pixman_image_t *         dst_image,
+                            int32_t                  src_x,
+                            int32_t                  src_y,
+                            int32_t                  mask_x,
+                            int32_t                  mask_y,
+                            int32_t                  dest_x,
+                            int32_t                  dest_y,
+                            int32_t                  width,
+                            int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    uint32_t src;
+    uint8_t sa;
+    __m64 vsrc, vsrca;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     sa = src >> 24;
     if (sa == 0)
 	return;
 
-    vsrc = load8888(src);
-    vsrca = expand_alpha(vsrc);
+    vsrc = load8888 (src);
+    vsrca = expand_alpha (vsrc);
 
     while (height--)
     {
@@ -2637,10 +2840,10 @@ mmx_composite_add_8888_8_8 (pixman_implementation_t *imp,
 
 	while (w--)
 	{
-	    uint16_t	tmp;
-	    uint16_t	a;
-	    uint32_t	m, d;
-	    uint32_t	r;
+	    uint16_t tmp;
+	    uint16_t a;
+	    uint32_t m, d;
+	    uint32_t r;
 
 	    a = *mask++;
 	    d = *dst;
@@ -2652,32 +2855,32 @@ mmx_composite_add_8888_8_8 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_add_8000_8000 (pixman_implementation_t *imp,
-				pixman_op_t op,
-				pixman_image_t * src_image,
-				pixman_image_t * mask_image,
-				pixman_image_t * dst_image,
-				int32_t      src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t     width,
-				int32_t     height)
-{
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint8_t	s, d;
-    uint16_t	t;
-
-    CHECKPOINT();
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
+{
+    uint8_t *dst_line, *dst;
+    uint8_t *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint8_t s, d;
+    uint16_t t;
+
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
@@ -2705,7 +2908,7 @@ mmx_composite_add_8000_8000 (pixman_implementation_t *imp,
 
 	while (w >= 8)
 	{
-	    *(__m64*)dst = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
+	    *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
 	    dst += 8;
 	    src += 8;
 	    w -= 8;
@@ -2725,31 +2928,31 @@ mmx_composite_add_8000_8000 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static void
 mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
-				pixman_op_t 	op,
-				pixman_image_t *	src_image,
-				pixman_image_t *	mask_image,
-				pixman_image_t *	 dst_image,
-				int32_t		 src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t     width,
-				int32_t     height)
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
 {
     __m64 dst64;
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
 
-    CHECKPOINT();
+    CHECKPOINT ();
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
@@ -2764,8 +2967,8 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 	while (w && (unsigned long)dst & 7)
 	{
-	    *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
-						 _mm_cvtsi32_si64(*dst)));
+	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
+	                                           _mm_cvtsi32_si64 (*dst)));
 	    dst++;
 	    src++;
 	    w--;
@@ -2773,8 +2976,8 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 	while (w >= 2)
 	{
-	    dst64 = _mm_adds_pu8(*(__m64*)src, *(__m64*)dst);
-	    *(uint64_t*)dst = UINT64(dst64);
+	    dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+	    *(uint64_t*)dst = UINT64 (dst64);
 	    dst += 2;
 	    src += 2;
 	    w -= 2;
@@ -2782,29 +2985,32 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 	if (w)
 	{
-	    *dst = _mm_cvtsi64_si32(_mm_adds_pu8(_mm_cvtsi32_si64(*src),
-						 _mm_cvtsi32_si64(*dst)));
+	    *dst = _mm_cvtsi64_si32 (_mm_adds_pu8 (_mm_cvtsi32_si64 (*src),
+	                                           _mm_cvtsi32_si64 (*dst)));
 
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static pixman_bool_t
 pixman_blt_mmx (uint32_t *src_bits,
-		uint32_t *dst_bits,
-		int src_stride,
-		int dst_stride,
-		int src_bpp,
-		int dst_bpp,
-		int src_x, int src_y,
-		int dst_x, int dst_y,
-		int width, int height)
-{
-    uint8_t *	src_bytes;
-    uint8_t *	dst_bytes;
-    int		byte_width;
+                uint32_t *dst_bits,
+                int       src_stride,
+                int       dst_stride,
+                int       src_bpp,
+                int       dst_bpp,
+                int       src_x,
+                int       src_y,
+                int       dst_x,
+                int       dst_y,
+                int       width,
+                int       height)
+{
+    uint8_t *   src_bytes;
+    uint8_t *   dst_bytes;
+    int byte_width;
 
     if (src_bpp != dst_bpp)
 	return FALSE;
@@ -2818,7 +3024,9 @@ pixman_blt_mmx (uint32_t *src_bits,
 	byte_width = 2 * width;
 	src_stride *= 2;
 	dst_stride *= 2;
-    } else if (src_bpp == 32) {
+    }
+    else if (src_bpp == 32)
+    {
 	src_stride = src_stride * (int) sizeof (uint32_t) / 4;
 	dst_stride = dst_stride * (int) sizeof (uint32_t) / 4;
 	src_bytes = (uint8_t *)(((uint32_t *)src_bits) + src_stride * (src_y) + (src_x));
@@ -2826,7 +3034,9 @@ pixman_blt_mmx (uint32_t *src_bits,
 	byte_width = 4 * width;
 	src_stride *= 4;
 	dst_stride *= 4;
-    } else {
+    }
+    else
+    {
 	return FALSE;
     }
 
@@ -2860,28 +3070,28 @@ pixman_blt_mmx (uint32_t *src_bits,
 	{
 #if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
 	    __asm__ (
-		"movq	  (%1),	  %%mm0\n"
-		"movq	 8(%1),	  %%mm1\n"
-		"movq	16(%1),	  %%mm2\n"
-		"movq	24(%1),	  %%mm3\n"
-		"movq	32(%1),	  %%mm4\n"
-		"movq	40(%1),	  %%mm5\n"
-		"movq	48(%1),	  %%mm6\n"
-		"movq	56(%1),	  %%mm7\n"
-
-		"movq	%%mm0,	  (%0)\n"
-		"movq	%%mm1,	 8(%0)\n"
-		"movq	%%mm2,	16(%0)\n"
-		"movq	%%mm3,	24(%0)\n"
-		"movq	%%mm4,	32(%0)\n"
-		"movq	%%mm5,	40(%0)\n"
-		"movq	%%mm6,	48(%0)\n"
-		"movq	%%mm7,	56(%0)\n"
+	        "movq	  (%1),	  %%mm0\n"
+	        "movq	 8(%1),	  %%mm1\n"
+	        "movq	16(%1),	  %%mm2\n"
+	        "movq	24(%1),	  %%mm3\n"
+	        "movq	32(%1),	  %%mm4\n"
+	        "movq	40(%1),	  %%mm5\n"
+	        "movq	48(%1),	  %%mm6\n"
+	        "movq	56(%1),	  %%mm7\n"
+
+	        "movq	%%mm0,	  (%0)\n"
+	        "movq	%%mm1,	 8(%0)\n"
+	        "movq	%%mm2,	16(%0)\n"
+	        "movq	%%mm3,	24(%0)\n"
+	        "movq	%%mm4,	32(%0)\n"
+	        "movq	%%mm5,	40(%0)\n"
+	        "movq	%%mm6,	48(%0)\n"
+	        "movq	%%mm7,	56(%0)\n"
 		:
 		: "r" (d), "r" (s)
 		: "memory",
-		  "%mm0", "%mm1", "%mm2", "%mm3",
-		  "%mm4", "%mm5", "%mm6", "%mm7");
+	        "%mm0", "%mm1", "%mm2", "%mm3",
+	        "%mm4", "%mm5", "%mm6", "%mm7");
 #else
 	    __m64 v0 = *(__m64 *)(s + 0);
 	    __m64 v1 = *(__m64 *)(s + 8);
@@ -2922,54 +3132,54 @@ pixman_blt_mmx (uint32_t *src_bits,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 
     return TRUE;
 }
 
 static void
 mmx_composite_copy_area (pixman_implementation_t *imp,
-			pixman_op_t       op,
-			pixman_image_t *	src_image,
-			pixman_image_t *	mask_image,
-			pixman_image_t *	dst_image,
-			int32_t		src_x,
-			int32_t		src_y,
-			int32_t		mask_x,
-			int32_t		mask_y,
-			int32_t		dest_x,
-			int32_t		dest_y,
-			int32_t		width,
-			int32_t		height)
+                         pixman_op_t              op,
+                         pixman_image_t *         src_image,
+                         pixman_image_t *         mask_image,
+                         pixman_image_t *         dst_image,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
 {
     pixman_blt_mmx (src_image->bits.bits,
-		    dst_image->bits.bits,
-		    src_image->bits.rowstride,
-		    dst_image->bits.rowstride,
-		    PIXMAN_FORMAT_BPP (src_image->bits.format),
-		    PIXMAN_FORMAT_BPP (dst_image->bits.format),
-		    src_x, src_y, dest_x, dest_y, width, height);
+                    dst_image->bits.bits,
+                    src_image->bits.rowstride,
+                    dst_image->bits.rowstride,
+                    PIXMAN_FORMAT_BPP (src_image->bits.format),
+                    PIXMAN_FORMAT_BPP (dst_image->bits.format),
+                    src_x, src_y, dest_x, dest_y, width, height);
 }
 
 static void
 mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-				pixman_op_t      op,
-				pixman_image_t * src_image,
-				pixman_image_t * mask_image,
-				pixman_image_t * dst_image,
-				int32_t      src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t     width,
-				int32_t     height)
-{
-    uint32_t	*src, *src_line;
+                                pixman_op_t              op,
+                                pixman_image_t *         src_image,
+                                pixman_image_t *         mask_image,
+                                pixman_image_t *         dst_image,
+                                int32_t                  src_x,
+                                int32_t                  src_y,
+                                int32_t                  mask_x,
+                                int32_t                  mask_y,
+                                int32_t                  dest_x,
+                                int32_t                  dest_y,
+                                int32_t                  width,
+                                int32_t                  height)
+{
+    uint32_t    *src, *src_line;
     uint32_t    *dst, *dst_line;
-    uint8_t	*mask, *mask_line;
-    int		 src_stride, mask_stride, dst_stride;
+    uint8_t     *mask, *mask_line;
+    int src_stride, mask_stride, dst_stride;
     uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
@@ -3000,8 +3210,8 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
 		else
 		{
 		    __m64 sa = expand_alpha (s);
-		    __m64 vm = expand_alpha_rev (M64(m));
-		    __m64 vdest = in_over(s, sa, vm, load8888 (*dst));
+		    __m64 vm = expand_alpha_rev (M64 (m));
+		    __m64 vdest = in_over (s, sa, vm, load8888 (*dst));
 
 		    *dst = store8888 (vdest);
 		}
@@ -3013,7 +3223,7 @@ mmx_composite_over_x888_8_8888 (pixman_implementation_t *imp,
 	}
     }
 
-    _mm_empty();
+    _mm_empty ();
 }
 
 static const pixman_fast_path_t mmx_fast_paths[] =
@@ -3043,35 +3253,35 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5,   mmx_composite_over_pixbuf_0565, NEED_PIXBUF },
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   mmx_composite_over_pixbuf_0565, NEED_PIXBUF },
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, mmx_composite_over_x888_n_8888,    NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, mmx_composite_over_x888_n_8888,	   NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,	PIXMAN_a8b8g8r8, mmx_composite_over_x888_n_8888,	   NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,	PIXMAN_x8b8g8r8, mmx_composite_over_x888_n_8888,	   NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, mmx_composite_over_x888_n_8888,           NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, mmx_composite_over_x888_n_8888,           NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, mmx_composite_over_x888_n_8888,           NEED_SOLID_MASK },
     { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, mmx_composite_over_8888_n_8888,    NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, mmx_composite_over_8888_n_8888,	   NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,	PIXMAN_a8b8g8r8, mmx_composite_over_8888_n_8888,	   NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,	PIXMAN_x8b8g8r8, mmx_composite_over_8888_n_8888,	   NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, mmx_composite_over_8888_n_8888,           NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, mmx_composite_over_8888_n_8888,           NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, mmx_composite_over_8888_n_8888,           NEED_SOLID_MASK },
 #if 0
     /* FIXME: This code is commented out since it's apparently not actually faster than the generic code. */
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,	PIXMAN_x8r8g8b8, mmx_composite_over_x888_8_8888,   0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,	PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888,   0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,	PIXMAN_x8b8g8r8, mmx_composite_over_x888_8_8888,   0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,	PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, mmx_composite_over_x888_8_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,       PIXMAN_x8b8g8r8, mmx_composite_over_x888_8_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8r8g8, PIXMAN_a8,       PIXMAN_a8r8g8b8, mmx_composite_over_x888_8_8888,   0 },
 #endif
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,	PIXMAN_a8r8g8b8, mmx_composite_over_n_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_over_n_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   mmx_composite_over_n_0565,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_copy_area,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, mmx_composite_copy_area,	   0 },
-
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, mmx_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, mmx_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_r5g6b5,	 mmx_composite_over_8888_0565,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_a8b8g8r8, mmx_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_x8b8g8r8, mmx_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   mmx_composite_over_8888_0565,	   0 },
-
-    { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_a8r8g8b8, mmx_composite_add_8888_8888,   0 },
-    { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_a8b8g8r8, mmx_composite_add_8888_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_a8r8g8b8, mmx_composite_over_n_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_over_n_8888,        0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   mmx_composite_over_n_0565,        0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_copy_area,          0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, mmx_composite_copy_area,          0 },
+
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, mmx_composite_over_8888_8888,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_over_8888_8888,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   mmx_composite_over_8888_0565,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, mmx_composite_over_8888_8888,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, mmx_composite_over_8888_8888,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   mmx_composite_over_8888_0565,     0 },
+
+    { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, mmx_composite_add_8888_8888,   0 },
+    { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, mmx_composite_add_8888_8888,   0 },
     { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       mmx_composite_add_8000_8000,   0 },
     { PIXMAN_OP_ADD, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       mmx_composite_add_8888_8_8,    0 },
 
@@ -3079,71 +3289,76 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8r8g8b8, mmx_composite_src_n_8_8888, 0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, mmx_composite_src_n_8_8888, 0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, mmx_composite_src_n_8_8888, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_a8r8g8b8, mmx_composite_copy_area, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_a8b8g8r8, mmx_composite_copy_area, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,	PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,	PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, mmx_composite_copy_area, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, mmx_composite_copy_area, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, mmx_composite_copy_area, 0 },
+    { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, mmx_composite_copy_area, 0 },
     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   mmx_composite_copy_area, 0 },
-    { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   mmx_composite_copy_area, 0 },    
+    { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   mmx_composite_copy_area, 0 },
 
     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       mmx_composite_in_8_8,   0 },
-    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,	PIXMAN_a8,	 mmx_composite_in_n_8_8, 0 },
+    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       mmx_composite_in_n_8_8, 0 },
 
     { PIXMAN_OP_NONE },
 };
 
 static void
 mmx_composite (pixman_implementation_t *imp,
-	       pixman_op_t     op,
-	       pixman_image_t *src,
-	       pixman_image_t *mask,
-	       pixman_image_t *dest,
-	       int32_t         src_x,
-	       int32_t         src_y,
-	       int32_t         mask_x,
-	       int32_t         mask_y,
-	       int32_t         dest_x,
-	       int32_t         dest_y,
-	       int32_t         width,
-	       int32_t         height)
+               pixman_op_t              op,
+               pixman_image_t *         src,
+               pixman_image_t *         mask,
+               pixman_image_t *         dest,
+               int32_t                  src_x,
+               int32_t                  src_y,
+               int32_t                  mask_x,
+               int32_t                  mask_y,
+               int32_t                  dest_x,
+               int32_t                  dest_y,
+               int32_t                  width,
+               int32_t                  height)
 {
     if (_pixman_run_fast_path (mmx_fast_paths, imp,
-			       op, src, mask, dest,
-			       src_x, src_y,
-			       mask_x, mask_y,
-			       dest_x, dest_y,
-			       width, height))
+                               op, src, mask, dest,
+                               src_x, src_y,
+                               mask_x, mask_y,
+                               dest_x, dest_y,
+                               width, height))
+    {
 	return;
+    }
 
     _pixman_implementation_composite (imp->delegate,
-				      op, src, mask, dest, src_x, src_y,
-				      mask_x, mask_y, dest_x, dest_y,
-				      width, height);
+                                      op, src, mask, dest, src_x, src_y,
+                                      mask_x, mask_y, dest_x, dest_y,
+                                      width, height);
 }
 
 static pixman_bool_t
 mmx_blt (pixman_implementation_t *imp,
-	 uint32_t *src_bits,
-	 uint32_t *dst_bits,
-	 int src_stride,
-	 int dst_stride,
-	 int src_bpp,
-	 int dst_bpp,
-	 int src_x, int src_y,
-	 int dst_x, int dst_y,
-	 int width, int height)
+         uint32_t *               src_bits,
+         uint32_t *               dst_bits,
+         int                      src_stride,
+         int                      dst_stride,
+         int                      src_bpp,
+         int                      dst_bpp,
+         int                      src_x,
+         int                      src_y,
+         int                      dst_x,
+         int                      dst_y,
+         int                      width,
+         int                      height)
 {
     if (!pixman_blt_mmx (
-	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-	    src_x, src_y, dst_x, dst_y, width, height))
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dst_x, dst_y, width, height))
 
     {
 	return _pixman_implementation_blt (
-	    imp->delegate,
-	    src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-	    src_x, src_y, dst_x, dst_y, width, height);
+	           imp->delegate,
+	           src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+	           src_x, src_y, dst_x, dst_y, width, height);
     }
 
     return TRUE;
@@ -3151,19 +3366,19 @@ mmx_blt (pixman_implementation_t *imp,
 
 static pixman_bool_t
 mmx_fill (pixman_implementation_t *imp,
-	  uint32_t *bits,
-	  int stride,
-	  int bpp,
-	  int x,
-	  int y,
-	  int width,
-	  int height,
-	  uint32_t xor)
+          uint32_t *               bits,
+          int                      stride,
+          int                      bpp,
+          int                      x,
+          int                      y,
+          int                      width,
+          int                      height,
+          uint32_t xor)
 {
     if (!pixman_fill_mmx (bits, stride, bpp, x, y, width, height, xor))
     {
 	return _pixman_implementation_fill (
-	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+	           imp->delegate, bits, stride, bpp, x, y, width, height, xor);
     }
 
     return TRUE;
@@ -3183,10 +3398,10 @@ _pixman_implementation_create_mmx (void)
     imp->combine_32[PIXMAN_OP_OUT_REVERSE] = mmx_combine_out_reverse_u;
     imp->combine_32[PIXMAN_OP_ATOP] = mmx_combine_atop_u;
     imp->combine_32[PIXMAN_OP_ATOP_REVERSE] = mmx_combine_atop_reverse_u;
-    imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u; 
+    imp->combine_32[PIXMAN_OP_XOR] = mmx_combine_xor_u;
     imp->combine_32[PIXMAN_OP_ADD] = mmx_combine_add_u;
     imp->combine_32[PIXMAN_OP_SATURATE] = mmx_combine_saturate_u;
-    
+
     imp->combine_32_ca[PIXMAN_OP_SRC] = mmx_combine_src_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER] = mmx_combine_over_ca;
     imp->combine_32_ca[PIXMAN_OP_OVER_REVERSE] = mmx_combine_over_reverse_ca;
@@ -3202,7 +3417,7 @@ _pixman_implementation_create_mmx (void)
     imp->composite = mmx_composite;
     imp->blt = mmx_blt;
     imp->fill = mmx_fill;
-    
+
     return imp;
 }
 
commit c68283360d2e5917f15bddc0a14aa7a1c1b3852e
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:24:31 2009 -0400

    Reindent and reformat pixman-matrix.c

diff --git a/pixman/pixman-matrix.c b/pixman/pixman-matrix.c
index 29f6025..abdfa05 100644
--- a/pixman/pixman-matrix.c
+++ b/pixman/pixman-matrix.c
@@ -32,595 +32,737 @@
 #include <string.h>
 #include "pixman-private.h"
 
-#define F(x)	pixman_int_to_fixed(x)
+#define F(x)    pixman_int_to_fixed (x)
 
 PIXMAN_EXPORT void
-pixman_transform_init_identity(struct pixman_transform *matrix)
+pixman_transform_init_identity (struct pixman_transform *matrix)
 {
-	int	i;
+    int i;
 
-	memset(matrix, '\0', sizeof (struct pixman_transform));
-	for (i = 0; i < 3; i++)
-		matrix->matrix[i][i] = F(1);
+    memset (matrix, '\0', sizeof (struct pixman_transform));
+    for (i = 0; i < 3; i++)
+	matrix->matrix[i][i] = F (1);
 }
 
-typedef pixman_fixed_32_32_t	pixman_fixed_34_30_t;
+typedef pixman_fixed_32_32_t pixman_fixed_34_30_t;
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point_3d(const struct pixman_transform *transform,
-			  struct pixman_vector *vector)
+pixman_transform_point_3d (const struct pixman_transform *transform,
+                           struct pixman_vector *         vector)
 {
-	struct pixman_vector result;
-	pixman_fixed_32_32_t partial;
-	pixman_fixed_48_16_t v;
-	int i, j;
+    struct pixman_vector result;
+    pixman_fixed_32_32_t partial;
+    pixman_fixed_48_16_t v;
+    int i, j;
 
-	for (j = 0; j < 3; j++)
+    for (j = 0; j < 3; j++)
+    {
+	v = 0;
+	for (i = 0; i < 3; i++)
 	{
-		v = 0;
-		for (i = 0; i < 3; i++)
-		{
-			partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
-				   (pixman_fixed_48_16_t) vector->vector[i]);
-			v += partial >> 16;
-		}
-		if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
-			return FALSE;
-		result.vector[j] = (pixman_fixed_t) v;
+	    partial = ((pixman_fixed_48_16_t) transform->matrix[j][i] *
+	               (pixman_fixed_48_16_t) vector->vector[i]);
+	    v += partial >> 16;
 	}
-	*vector = result;
-	if (!result.vector[2])
-		return FALSE;
-	return TRUE;
+	
+	if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
+	    return FALSE;
+	
+	result.vector[j] = (pixman_fixed_t) v;
+    }
+    
+    *vector = result;
+
+    if (!result.vector[2])
+	return FALSE;
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_point(const struct pixman_transform *transform,
-		       struct pixman_vector *vector)
+pixman_transform_point (const struct pixman_transform *transform,
+                        struct pixman_vector *         vector)
 {
-	pixman_fixed_32_32_t partial;
-	pixman_fixed_34_30_t v[3];
-	pixman_fixed_48_16_t quo;
-	int i, j;
+    pixman_fixed_32_32_t partial;
+    pixman_fixed_34_30_t v[3];
+    pixman_fixed_48_16_t quo;
+    int i, j;
 
-	for (j = 0; j < 3; j++)
-	{
-		v[j] = 0;
-		for (i = 0; i < 3; i++)
-		{
-			partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] * 
-				   (pixman_fixed_32_32_t) vector->vector[i]);
-			v[j] += partial >> 2;
-		}
-	}
-	if (!(v[2] >> 16))
-		return FALSE;
-	for (j = 0; j < 2; j++)
+    for (j = 0; j < 3; j++)
+    {
+	v[j] = 0;
+	
+	for (i = 0; i < 3; i++)
 	{
-		quo = v[j] / (v[2] >> 16);
-		if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
-			return FALSE;
-		vector->vector[j] = (pixman_fixed_t) quo;
+	    partial = ((pixman_fixed_32_32_t) transform->matrix[j][i] *
+	               (pixman_fixed_32_32_t) vector->vector[i]);
+	    v[j] += partial >> 2;
 	}
-	vector->vector[2] = pixman_fixed_1;
-	return TRUE;
+    }
+    
+    if (!(v[2] >> 16))
+	return FALSE;
+
+    for (j = 0; j < 2; j++)
+    {
+	quo = v[j] / (v[2] >> 16);
+	if (quo > pixman_max_fixed_48_16 || quo < pixman_min_fixed_48_16)
+	    return FALSE;
+	vector->vector[j] = (pixman_fixed_t) quo;
+    }
+    
+    vector->vector[2] = pixman_fixed_1;
+    return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_multiply (struct pixman_transform *dst,
-			   const struct pixman_transform *l,
-			   const struct pixman_transform *r)
-{
-	struct pixman_transform d;
-	int dx, dy;
-	int o;
-
-	for (dy = 0; dy < 3; dy++)
-		for (dx = 0; dx < 3; dx++) {
-			pixman_fixed_48_16_t    v;
-			pixman_fixed_32_32_t    partial;
-			v = 0;
-			for (o = 0; o < 3; o++) {
-				partial = (pixman_fixed_32_32_t) l->matrix[dy][o] * (pixman_fixed_32_32_t) r->matrix[o][dx];
-				v += partial >> 16;
-			}
-			if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
-				return FALSE;
-			d.matrix[dy][dx] = (pixman_fixed_t) v;
-		}
-	*dst = d;
-	return TRUE;
+pixman_transform_multiply (struct pixman_transform *      dst,
+                           const struct pixman_transform *l,
+                           const struct pixman_transform *r)
+{
+    struct pixman_transform d;
+    int dx, dy;
+    int o;
+
+    for (dy = 0; dy < 3; dy++)
+    {
+	for (dx = 0; dx < 3; dx++)
+	{
+	    pixman_fixed_48_16_t v;
+	    pixman_fixed_32_32_t partial;
+	    
+	    v = 0;
+	    for (o = 0; o < 3; o++)
+	    {
+		partial =
+		    (pixman_fixed_32_32_t) l->matrix[dy][o] *
+		    (pixman_fixed_32_32_t) r->matrix[o][dx];
+
+		v += partial >> 16;
+	    }
+
+	    if (v > pixman_max_fixed_48_16 || v < pixman_min_fixed_48_16)
+		return FALSE;
+	    
+	    d.matrix[dy][dx] = (pixman_fixed_t) v;
+	}
+    }
+
+    *dst = d;
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
 pixman_transform_init_scale (struct pixman_transform *t,
-			     pixman_fixed_t sx,
-			     pixman_fixed_t sy)
+                             pixman_fixed_t           sx,
+                             pixman_fixed_t           sy)
 {
-	memset (t, '\0', sizeof (struct pixman_transform));
-	t->matrix[0][0] = sx;
-	t->matrix[1][1] = sy;
-	t->matrix[2][2] = F (1);
+    memset (t, '\0', sizeof (struct pixman_transform));
+
+    t->matrix[0][0] = sx;
+    t->matrix[1][1] = sy;
+    t->matrix[2][2] = F (1);
 }
 
 static pixman_fixed_t
-fixed_inverse(pixman_fixed_t x)
+fixed_inverse (pixman_fixed_t x)
 {
-	return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F(1)) * F(1)) / x);
+    return (pixman_fixed_t) ((((pixman_fixed_48_16_t) F (1)) * F (1)) / x);
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_scale(struct pixman_transform *forward,
-		       struct pixman_transform *reverse,
-		       pixman_fixed_t sx, pixman_fixed_t sy)
-{
-	struct pixman_transform   t;
-
-	if (sx == 0 || sy == 0)
-		return FALSE;
-
-	if (forward) {
-		pixman_transform_init_scale (&t, sx, sy);
-		if (!pixman_transform_multiply (forward, &t, forward))
-			return FALSE;
-	}
-	if (reverse) {
-		pixman_transform_init_scale (&t, fixed_inverse (sx),
-					     fixed_inverse (sy));
-		if (!pixman_transform_multiply (reverse, reverse, &t))
-			return FALSE;
-	}
-	return TRUE;
+pixman_transform_scale (struct pixman_transform *forward,
+                        struct pixman_transform *reverse,
+                        pixman_fixed_t           sx,
+                        pixman_fixed_t           sy)
+{
+    struct pixman_transform t;
+
+    if (sx == 0 || sy == 0)
+	return FALSE;
+
+    if (forward)
+    {
+	pixman_transform_init_scale (&t, sx, sy);
+	if (!pixman_transform_multiply (forward, &t, forward))
+	    return FALSE;
+    }
+    
+    if (reverse)
+    {
+	pixman_transform_init_scale (&t, fixed_inverse (sx),
+	                             fixed_inverse (sy));
+	if (!pixman_transform_multiply (reverse, reverse, &t))
+	    return FALSE;
+    }
+    
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
-pixman_transform_init_rotate(struct pixman_transform *t,
-			     pixman_fixed_t c,
-			     pixman_fixed_t s)
+pixman_transform_init_rotate (struct pixman_transform *t,
+                              pixman_fixed_t           c,
+                              pixman_fixed_t           s)
 {
-	memset(t, '\0', sizeof (struct pixman_transform));
-	t->matrix[0][0] = c;
-	t->matrix[0][1] = -s;
-	t->matrix[1][0] = s;
-	t->matrix[1][1] = c;
-	t->matrix[2][2] = F (1);
+    memset (t, '\0', sizeof (struct pixman_transform));
+
+    t->matrix[0][0] = c;
+    t->matrix[0][1] = -s;
+    t->matrix[1][0] = s;
+    t->matrix[1][1] = c;
+    t->matrix[2][2] = F (1);
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_rotate(struct pixman_transform *forward,
-			struct pixman_transform *reverse,
-			pixman_fixed_t c, pixman_fixed_t s)
-{
-	struct pixman_transform   t;
-	
-	if (forward) {
-		pixman_transform_init_rotate(&t, c, s);
-		if (!pixman_transform_multiply(forward, &t, forward))
-			return FALSE;
-	}
-
-	if (reverse) {
-		pixman_transform_init_rotate(&t, c, -s);
-		if (!pixman_transform_multiply (reverse, reverse, &t))
-			return FALSE;
-	}
-	return TRUE;
+pixman_transform_rotate (struct pixman_transform *forward,
+                         struct pixman_transform *reverse,
+                         pixman_fixed_t           c,
+                         pixman_fixed_t           s)
+{
+    struct pixman_transform t;
+
+    if (forward)
+    {
+	pixman_transform_init_rotate (&t, c, s);
+	if (!pixman_transform_multiply (forward, &t, forward))
+	    return FALSE;
+    }
+
+    if (reverse)
+    {
+	pixman_transform_init_rotate (&t, c, -s);
+	if (!pixman_transform_multiply (reverse, reverse, &t))
+	    return FALSE;
+    }
+    
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
-pixman_transform_init_translate(struct pixman_transform *t,
-				pixman_fixed_t tx, pixman_fixed_t ty)
+pixman_transform_init_translate (struct pixman_transform *t,
+                                 pixman_fixed_t           tx,
+                                 pixman_fixed_t           ty)
 {
-	memset(t, '\0', sizeof (struct pixman_transform));
-	t->matrix[0][0] = F (1);
-	t->matrix[0][2] = tx;
-	t->matrix[1][1] = F (1);
-	t->matrix[1][2] = ty;
-	t->matrix[2][2] = F (1);
+    memset (t, '\0', sizeof (struct pixman_transform));
+
+    t->matrix[0][0] = F (1);
+    t->matrix[0][2] = tx;
+    t->matrix[1][1] = F (1);
+    t->matrix[1][2] = ty;
+    t->matrix[2][2] = F (1);
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_translate(struct pixman_transform *forward,
-			   struct pixman_transform *reverse,
-			   pixman_fixed_t tx, pixman_fixed_t ty)
+pixman_transform_translate (struct pixman_transform *forward,
+                            struct pixman_transform *reverse,
+                            pixman_fixed_t           tx,
+                            pixman_fixed_t           ty)
 {
-	struct pixman_transform   t;
+    struct pixman_transform t;
 
-	if (forward) {
-		pixman_transform_init_translate(&t, tx, ty);
-		if (!pixman_transform_multiply(forward, &t, forward))
-			return FALSE;
-	}
+    if (forward)
+    {
+	pixman_transform_init_translate (&t, tx, ty);
 
-	if (reverse) {
-		pixman_transform_init_translate(&t, -tx, -ty);
-		if (!pixman_transform_multiply(reverse, reverse, &t))
-			return FALSE;
-	}
-	return TRUE;
+	if (!pixman_transform_multiply (forward, &t, forward))
+	    return FALSE;
+    }
+
+    if (reverse)
+    {
+	pixman_transform_init_translate (&t, -tx, -ty);
+
+	if (!pixman_transform_multiply (reverse, reverse, &t))
+	    return FALSE;
+    }
+    return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_bounds(const struct pixman_transform *matrix,
-			struct pixman_box16 *b)
-			
-{
-	struct pixman_vector v[4];
-	int i;
-	int x1, y1, x2, y2;
-
-	v[0].vector[0] = F (b->x1);    v[0].vector[1] = F (b->y1);	v[0].vector[2] = F(1);
-	v[1].vector[0] = F (b->x2);    v[1].vector[1] = F (b->y1);	v[1].vector[2] = F(1);
-	v[2].vector[0] = F (b->x2);    v[2].vector[1] = F (b->y2);	v[2].vector[2] = F(1);
-	v[3].vector[0] = F (b->x1);    v[3].vector[1] = F (b->y2);	v[3].vector[2] = F(1);
-	for (i = 0; i < 4; i++)
+pixman_transform_bounds (const struct pixman_transform *matrix,
+                         struct pixman_box16 *          b)
+
+{
+    struct pixman_vector v[4];
+    int i;
+    int x1, y1, x2, y2;
+
+    v[0].vector[0] = F (b->x1);
+    v[0].vector[1] = F (b->y1);
+    v[0].vector[2] = F (1);
+
+    v[1].vector[0] = F (b->x2);
+    v[1].vector[1] = F (b->y1);
+    v[1].vector[2] = F (1);
+
+    v[2].vector[0] = F (b->x2);
+    v[2].vector[1] = F (b->y2);
+    v[2].vector[2] = F (1);
+
+    v[3].vector[0] = F (b->x1);
+    v[3].vector[1] = F (b->y2);
+    v[3].vector[2] = F (1);
+
+    for (i = 0; i < 4; i++)
+    {
+	if (!pixman_transform_point (matrix, &v[i]))
+	    return FALSE;
+
+	x1 = pixman_fixed_to_int (v[i].vector[0]);
+	y1 = pixman_fixed_to_int (v[i].vector[1]);
+	x2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[0]));
+	y2 = pixman_fixed_to_int (pixman_fixed_ceil (v[i].vector[1]));
+
+	if (i == 0)
+	{
+	    b->x1 = x1;
+	    b->y1 = y1;
+	    b->x2 = x2;
+	    b->y2 = y2;
+	}
+	else
 	{
-		if (!pixman_transform_point(matrix, &v[i]))
-			return FALSE;
-		x1 = pixman_fixed_to_int(v[i].vector[0]);
-		y1 = pixman_fixed_to_int(v[i].vector[1]);
-		x2 = pixman_fixed_to_int(pixman_fixed_ceil (v[i].vector[0]));
-		y2 = pixman_fixed_to_int(pixman_fixed_ceil (v[i].vector[1]));
-		if (i == 0)
-		{
-			b->x1 = x1; b->y1 = y1;
-			b->x2 = x2; b->y2 = y2;
-		}
-		else
-		{
-			if (x1 < b->x1) b->x1 = x1;
-			if (y1 < b->y1) b->y1 = y1;
-			if (x2 > b->x2) b->x2 = x2;
-			if (y2 > b->y2) b->y2 = y2;
-		}
+	    if (x1 < b->x1) b->x1 = x1;
+	    if (y1 < b->y1) b->y1 = y1;
+	    if (x2 > b->x2) b->x2 = x2;
+	    if (y2 > b->y2) b->y2 = y2;
 	}
-	return TRUE;
+    }
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_invert (struct pixman_transform *dst,
-			 const struct pixman_transform *src)
+pixman_transform_invert (struct pixman_transform *      dst,
+                         const struct pixman_transform *src)
 {
-	struct pixman_f_transform m, r;
+    struct pixman_f_transform m, r;
 
-	pixman_f_transform_from_pixman_transform (&m, src);
-	if (!pixman_f_transform_invert (&r, &m))
-		return FALSE;
-	if (!pixman_transform_from_pixman_f_transform (dst, &r))
-		return FALSE;
-	return TRUE;
+    pixman_f_transform_from_pixman_transform (&m, src);
+
+    if (!pixman_f_transform_invert (&r, &m))
+	return FALSE;
+
+    if (!pixman_transform_from_pixman_f_transform (dst, &r))
+	return FALSE;
+
+    return TRUE;
 }
 
 static pixman_bool_t
-within_epsilon(pixman_fixed_t a, pixman_fixed_t b, pixman_fixed_t epsilon)
+within_epsilon (pixman_fixed_t a,
+                pixman_fixed_t b,
+                pixman_fixed_t epsilon)
 {
-	pixman_fixed_t  t = a - b;
-	if (t < 0) t = -t;
-	return t <= epsilon;
+    pixman_fixed_t t = a - b;
+
+    if (t < 0)
+	t = -t;
+
+    return t <= epsilon;
 }
 
-#define EPSILON	(pixman_fixed_t) (2)
+#define EPSILON (pixman_fixed_t) (2)
 
-#define IS_SAME(a,b) (within_epsilon(a, b, EPSILON))
-#define IS_ZERO(a)   (within_epsilon(a, 0, EPSILON))
-#define IS_ONE(a)    (within_epsilon(a, F(1), EPSILON))
-#define IS_UNIT(a)   (within_epsilon(a, F( 1), EPSILON) || \
-		      within_epsilon(a, F(-1), EPSILON) || \
-		      IS_ZERO(a))
-#define IS_INT(a)    (IS_ZERO(pixman_fixed_frac(a)))
+#define IS_SAME(a, b) (within_epsilon (a, b, EPSILON))
+#define IS_ZERO(a)    (within_epsilon (a, 0, EPSILON))
+#define IS_ONE(a)     (within_epsilon (a, F (1), EPSILON))
+#define IS_UNIT(a)			    \
+    (within_epsilon (a, F (1), EPSILON) ||  \
+     within_epsilon (a, F (-1), EPSILON) || \
+     IS_ZERO (a))
+#define IS_INT(a)    (IS_ZERO (pixman_fixed_frac (a)))
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_identity(const struct pixman_transform *t)
+pixman_transform_is_identity (const struct pixman_transform *t)
 {
-	return ( IS_SAME(t->matrix[0][0], t->matrix[1][1]) &&
-		 IS_SAME(t->matrix[0][0], t->matrix[2][2]) &&
-		!IS_ZERO(t->matrix[0][0]) &&
-		 IS_ZERO(t->matrix[0][1]) &&
-		 IS_ZERO(t->matrix[0][2]) &&
-		 IS_ZERO(t->matrix[1][0]) &&
-		 IS_ZERO(t->matrix[1][2]) &&
-		 IS_ZERO(t->matrix[2][0]) &&
-		 IS_ZERO(t->matrix[2][1]));
+    return (IS_SAME (t->matrix[0][0], t->matrix[1][1]) &&
+	    IS_SAME (t->matrix[0][0], t->matrix[2][2]) &&
+	    !IS_ZERO (t->matrix[0][0]) &&
+	    IS_ZERO (t->matrix[0][1]) &&
+	    IS_ZERO (t->matrix[0][2]) &&
+	    IS_ZERO (t->matrix[1][0]) &&
+	    IS_ZERO (t->matrix[1][2]) &&
+	    IS_ZERO (t->matrix[2][0]) &&
+	    IS_ZERO (t->matrix[2][1]));
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_scale(const struct pixman_transform *t)
+pixman_transform_is_scale (const struct pixman_transform *t)
 {
-	return (!IS_ZERO(t->matrix[0][0]) &&
-		 IS_ZERO(t->matrix[0][1]) &&
-		 IS_ZERO(t->matrix[0][2]) &&
+    return (!IS_ZERO (t->matrix[0][0]) &&
+            IS_ZERO (t->matrix[0][1]) &&
+            IS_ZERO (t->matrix[0][2]) &&
 
-		 IS_ZERO(t->matrix[1][0]) &&
-		!IS_ZERO(t->matrix[1][1]) &&
-		 IS_ZERO(t->matrix[1][2]) &&
+            IS_ZERO (t->matrix[1][0]) &&
+            !IS_ZERO (t->matrix[1][1]) &&
+            IS_ZERO (t->matrix[1][2]) &&
 
-		 IS_ZERO(t->matrix[2][0]) &&
-		 IS_ZERO(t->matrix[2][1]) &&
-		!IS_ZERO(t->matrix[2][2]));
+            IS_ZERO (t->matrix[2][0]) &&
+            IS_ZERO (t->matrix[2][1]) &&
+            !IS_ZERO (t->matrix[2][2]));
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_int_translate(const struct pixman_transform *t)
+pixman_transform_is_int_translate (const struct pixman_transform *t)
 {
-	return (IS_ONE (t->matrix[0][0]) &&
-		IS_ZERO(t->matrix[0][1]) &&
-		IS_INT (t->matrix[0][2]) &&
+    return (IS_ONE (t->matrix[0][0]) &&
+            IS_ZERO (t->matrix[0][1]) &&
+            IS_INT (t->matrix[0][2]) &&
 
-		IS_ZERO(t->matrix[1][0]) &&
-		IS_ONE (t->matrix[1][1]) &&
-		IS_INT (t->matrix[1][2]) &&
+            IS_ZERO (t->matrix[1][0]) &&
+            IS_ONE (t->matrix[1][1]) &&
+            IS_INT (t->matrix[1][2]) &&
 
-		IS_ZERO(t->matrix[2][0]) &&
-		IS_ZERO(t->matrix[2][1]) &&
-		IS_ONE (t->matrix[2][2]));
+            IS_ZERO (t->matrix[2][0]) &&
+            IS_ZERO (t->matrix[2][1]) &&
+            IS_ONE (t->matrix[2][2]));
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_is_inverse(const struct pixman_transform *a,
-			    const struct pixman_transform *b)
+pixman_transform_is_inverse (const struct pixman_transform *a,
+                             const struct pixman_transform *b)
 {
-	struct pixman_transform   t;
+    struct pixman_transform t;
 
-	pixman_transform_multiply(&t, a, b);
-	return pixman_transform_is_identity(&t);
+    pixman_transform_multiply (&t, a, b);
+
+    return pixman_transform_is_identity (&t);
 }
 
 PIXMAN_EXPORT void
-pixman_f_transform_from_pixman_transform (struct pixman_f_transform *ft,
-					  const struct pixman_transform *t)
+pixman_f_transform_from_pixman_transform (struct pixman_f_transform *    ft,
+                                          const struct pixman_transform *t)
 {
-	int	i, j;
+    int i, j;
 
-	for (j = 0; j < 3; j++)
-		for (i = 0; i < 3; i++)
-			ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]);
+    for (j = 0; j < 3; j++)
+    {
+	for (i = 0; i < 3; i++)
+	    ft->m[j][i] = pixman_fixed_to_double (t->matrix[j][i]);
+    }
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_transform_from_pixman_f_transform (struct pixman_transform *t,
-					  const struct pixman_f_transform *ft)
+pixman_transform_from_pixman_f_transform (struct pixman_transform *        t,
+                                          const struct pixman_f_transform *ft)
 {
-	int	i, j;
+    int i, j;
 
-	for (j = 0; j < 3; j++)
-		for (i = 0; i < 3; i++)
-		{
-			double  d = ft->m[j][i];
-			if (d < -32767.0 || d > 32767.0)
-				return FALSE;
-			d = d * 65536.0 + 0.5;
-			t->matrix[j][i] = (pixman_fixed_t) floor (d);
-		}
-	return TRUE;
+    for (j = 0; j < 3; j++)
+    {
+	for (i = 0; i < 3; i++)
+	{
+	    double d = ft->m[j][i];
+	    if (d < -32767.0 || d > 32767.0)
+		return FALSE;
+	    d = d * 65536.0 + 0.5;
+	    t->matrix[j][i] = (pixman_fixed_t) floor (d);
+	}
+    }
+    
+    return TRUE;
 }
 
-static const int	a[3] = { 3, 3, 2 };
-static const int	b[3] = { 2, 1, 1 };
+static const int a[3] = { 3, 3, 2 };
+static const int b[3] = { 2, 1, 1 };
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_invert(struct pixman_f_transform *dst,
-			  const struct pixman_f_transform *src)
-{
-	double  det;
-	int	    i, j;
-	static int	a[3] = { 2, 2, 1 };
-	static int	b[3] = { 1, 0, 0 };
-
-	det = 0;
-	for (i = 0; i < 3; i++) {
-		double	p;
-		int	ai = a[i];
-		int	bi = b[i];
-		p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] -
-				    src->m[ai][1] * src->m[bi][2]);
-		if (i == 1)
-			p = -p;
-		det += p;
-	}
-	if (det == 0)
-		return FALSE;
-	det = 1/det;
-	for (j = 0; j < 3; j++) {
-		for (i = 0; i < 3; i++) {
-			double  p;
-			int	    ai = a[i];
-			int	    aj = a[j];
-			int	    bi = b[i];
-			int	    bj = b[j];
-
-			p = (src->m[ai][aj] * src->m[bi][bj] -
-			     src->m[ai][bj] * src->m[bi][aj]);
-			if (((i + j) & 1) != 0)
-				p = -p;
-			dst->m[j][i] = det * p;
-		}
+pixman_f_transform_invert (struct pixman_f_transform *      dst,
+                           const struct pixman_f_transform *src)
+{
+    double det;
+    int i, j;
+    static int a[3] = { 2, 2, 1 };
+    static int b[3] = { 1, 0, 0 };
+
+    det = 0;
+    for (i = 0; i < 3; i++)
+    {
+	double p;
+	int ai = a[i];
+	int bi = b[i];
+	p = src->m[i][0] * (src->m[ai][2] * src->m[bi][1] -
+	                    src->m[ai][1] * src->m[bi][2]);
+	if (i == 1)
+	    p = -p;
+	det += p;
+    }
+    
+    if (det == 0)
+	return FALSE;
+    
+    det = 1 / det;
+    for (j = 0; j < 3; j++)
+    {
+	for (i = 0; i < 3; i++)
+	{
+	    double p;
+	    int ai = a[i];
+	    int aj = a[j];
+	    int bi = b[i];
+	    int bj = b[j];
+
+	    p = (src->m[ai][aj] * src->m[bi][bj] -
+	         src->m[ai][bj] * src->m[bi][aj]);
+	    
+	    if (((i + j) & 1) != 0)
+		p = -p;
+	    
+	    dst->m[j][i] = det * p;
 	}
-	return TRUE;
+    }
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_point(const struct pixman_f_transform *t,
-			 struct pixman_f_vector *v)
+pixman_f_transform_point (const struct pixman_f_transform *t,
+                          struct pixman_f_vector *         v)
 {
-	struct pixman_f_vector    result;
-	int			    i, j;
-	double		    a;
+    struct pixman_f_vector result;
+    int i, j;
+    double a;
 
-	for (j = 0; j < 3; j++)
-	{
-		a = 0;
-		for (i = 0; i < 3; i++)
-			a += t->m[j][i] * v->v[i];
-		result.v[j] = a;
-	}
-	if (!result.v[2])
-		return FALSE;
-	for (j = 0; j < 2; j++)
-		v->v[j] = result.v[j] / result.v[2];
-	v->v[2] = 1;
-	return TRUE;
+    for (j = 0; j < 3; j++)
+    {
+	a = 0;
+	for (i = 0; i < 3; i++)
+	    a += t->m[j][i] * v->v[i];
+	result.v[j] = a;
+    }
+    
+    if (!result.v[2])
+	return FALSE;
+
+    for (j = 0; j < 2; j++)
+	v->v[j] = result.v[j] / result.v[2];
+
+    v->v[2] = 1;
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
-pixman_f_transform_point_3d(const struct pixman_f_transform *t,
-			    struct pixman_f_vector *v)
+pixman_f_transform_point_3d (const struct pixman_f_transform *t,
+                             struct pixman_f_vector *         v)
 {
-	struct pixman_f_vector    result;
-	int			    i, j;
-	double		    a;
+    struct pixman_f_vector result;
+    int i, j;
+    double a;
 
-	for (j = 0; j < 3; j++)
-	{
-		a = 0;
-		for (i = 0; i < 3; i++)
-			a += t->m[j][i] * v->v[i];
-		result.v[j] = a;
-	}
-	*v = result;
+    for (j = 0; j < 3; j++)
+    {
+	a = 0;
+	for (i = 0; i < 3; i++)
+	    a += t->m[j][i] * v->v[i];
+	result.v[j] = a;
+    }
+    
+    *v = result;
 }
 
 PIXMAN_EXPORT void
-pixman_f_transform_multiply(struct pixman_f_transform *dst,
-			    const struct pixman_f_transform *l,
-			    const struct pixman_f_transform *r)
+pixman_f_transform_multiply (struct pixman_f_transform *      dst,
+                             const struct pixman_f_transform *l,
+                             const struct pixman_f_transform *r)
 {
-	struct pixman_f_transform d;
-	int			    dx, dy;
-	int			    o;
+    struct pixman_f_transform d;
+    int dx, dy;
+    int o;
 
-	for (dy = 0; dy < 3; dy++)
-		for (dx = 0; dx < 3; dx++)
-		{
-			double v = 0;
-			for (o = 0; o < 3; o++)
-				v += l->m[dy][o] * r->m[o][dx];
-			d.m[dy][dx] = v;
-		}
-	*dst = d;
+    for (dy = 0; dy < 3; dy++)
+    {
+	for (dx = 0; dx < 3; dx++)
+	{
+	    double v = 0;
+	    for (o = 0; o < 3; o++)
+		v += l->m[dy][o] * r->m[o][dx];
+	    d.m[dy][dx] = v;
+	}
+    }
+    
+    *dst = d;
 }
 
 PIXMAN_EXPORT void
-pixman_f_transform_init_scale (struct pixman_f_transform *t, double sx, double sy)
+pixman_f_transform_init_scale (struct pixman_f_transform *t,
+                               double                     sx,
+                               double                     sy)
 {
-	t->m[0][0] = sx;	t->m[0][1] = 0;	    t->m[0][2] = 0;
-	t->m[1][0] = 0;	t->m[1][1] = sy;    t->m[1][2] = 0;
-	t->m[2][0] = 0;	t->m[2][1] = 0;	    t->m[2][2] = 1;
+    t->m[0][0] = sx;
+    t->m[0][1] = 0;
+    t->m[0][2] = 0;
+    t->m[1][0] = 0;
+    t->m[1][1] = sy;
+    t->m[1][2] = 0;
+    t->m[2][0] = 0;
+    t->m[2][1] = 0;
+    t->m[2][2] = 1;
 }
 
 PIXMAN_EXPORT pixman_bool_t
 pixman_f_transform_scale (struct pixman_f_transform *forward,
-			  struct pixman_f_transform *reverse,
-			  double sx, double sy)
-{
-	struct pixman_f_transform t;
-
-	if (sx == 0 || sy == 0)
-		return FALSE;
-
-	if (forward) {
-		pixman_f_transform_init_scale (&t, sx, sy);
-		pixman_f_transform_multiply (forward, &t, forward);
-	}
-	if (reverse) {
-		pixman_f_transform_init_scale (&t, 1/sx, 1/sy);
-		pixman_f_transform_multiply (reverse, reverse, &t);
-	}
-	return TRUE;
+                          struct pixman_f_transform *reverse,
+                          double                     sx,
+                          double                     sy)
+{
+    struct pixman_f_transform t;
+
+    if (sx == 0 || sy == 0)
+	return FALSE;
+
+    if (forward)
+    {
+	pixman_f_transform_init_scale (&t, sx, sy);
+	pixman_f_transform_multiply (forward, &t, forward);
+    }
+    
+    if (reverse)
+    {
+	pixman_f_transform_init_scale (&t, 1 / sx, 1 / sy);
+	pixman_f_transform_multiply (reverse, reverse, &t);
+    }
+    
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
-pixman_f_transform_init_rotate (struct pixman_f_transform *t, double c, double s)
+pixman_f_transform_init_rotate (struct pixman_f_transform *t,
+                                double                     c,
+                                double                     s)
 {
-	t->m[0][0] = c;	t->m[0][1] = -s;    t->m[0][2] = 0;
-	t->m[1][0] = s;	t->m[1][1] = c;	    t->m[1][2] = 0;
-	t->m[2][0] = 0;	t->m[2][1] = 0;	    t->m[2][2] = 1;
+    t->m[0][0] = c;
+    t->m[0][1] = -s;
+    t->m[0][2] = 0;
+    t->m[1][0] = s;
+    t->m[1][1] = c;
+    t->m[1][2] = 0;
+    t->m[2][0] = 0;
+    t->m[2][1] = 0;
+    t->m[2][2] = 1;
 }
 
 PIXMAN_EXPORT pixman_bool_t
 pixman_f_transform_rotate (struct pixman_f_transform *forward,
-			   struct pixman_f_transform *reverse,
-			   double c, double s)
+                           struct pixman_f_transform *reverse,
+                           double                     c,
+                           double                     s)
 {
-	struct pixman_f_transform t;
+    struct pixman_f_transform t;
 
-	if (forward) {
-		pixman_f_transform_init_rotate (&t, c, s);
-		pixman_f_transform_multiply (forward, &t, forward);
-	}
-	if (reverse) {
-		pixman_f_transform_init_rotate (&t, c, -s);
-		pixman_f_transform_multiply (reverse, reverse, &t);
-	}
-	return TRUE;
+    if (forward)
+    {
+	pixman_f_transform_init_rotate (&t, c, s);
+	pixman_f_transform_multiply (forward, &t, forward);
+    }
+    
+    if (reverse)
+    {
+	pixman_f_transform_init_rotate (&t, c, -s);
+	pixman_f_transform_multiply (reverse, reverse, &t);
+    }
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
-pixman_f_transform_init_translate (struct pixman_f_transform *t, double tx, double ty)
+pixman_f_transform_init_translate (struct pixman_f_transform *t,
+                                   double                     tx,
+                                   double                     ty)
 {
-	t->m[0][0] = 1;	t->m[0][1] = 0;	    t->m[0][2] = tx;
-	t->m[1][0] = 0;	t->m[1][1] = 1;	    t->m[1][2] = ty;
-	t->m[2][0] = 0;	t->m[2][1] = 0;	    t->m[2][2] = 1;
+    t->m[0][0] = 1;
+    t->m[0][1] = 0;
+    t->m[0][2] = tx;
+    t->m[1][0] = 0;
+    t->m[1][1] = 1;
+    t->m[1][2] = ty;
+    t->m[2][0] = 0;
+    t->m[2][1] = 0;
+    t->m[2][2] = 1;
 }
 
 PIXMAN_EXPORT pixman_bool_t
 pixman_f_transform_translate (struct pixman_f_transform *forward,
-			      struct pixman_f_transform *reverse,
-			      double tx, double ty)
+                              struct pixman_f_transform *reverse,
+                              double                     tx,
+                              double                     ty)
 {
-	struct pixman_f_transform t;
+    struct pixman_f_transform t;
 
-	if (forward) {
-		pixman_f_transform_init_translate (&t, tx, ty);
-		pixman_f_transform_multiply (forward, &t, forward);
-	}
-	if (reverse) {
-		pixman_f_transform_init_translate (&t, -tx, -ty);
-		pixman_f_transform_multiply (reverse, reverse, &t);
-	}
-	return TRUE;
+    if (forward)
+    {
+	pixman_f_transform_init_translate (&t, tx, ty);
+	pixman_f_transform_multiply (forward, &t, forward);
+    }
+
+    if (reverse)
+    {
+	pixman_f_transform_init_translate (&t, -tx, -ty);
+	pixman_f_transform_multiply (reverse, reverse, &t);
+    }
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_f_transform_bounds(const struct pixman_f_transform *t, struct pixman_box16 *b)
-{
-	struct pixman_f_vector    v[4];
-	int			    i;
-	int			    x1, y1, x2, y2;
-
-	v[0].v[0] = b->x1;    v[0].v[1] = b->y1;	v[0].v[2] = 1;
-	v[1].v[0] = b->x2;    v[1].v[1] = b->y1;	v[1].v[2] = 1;
-	v[2].v[0] = b->x2;    v[2].v[1] = b->y2;	v[2].v[2] = 1;
-	v[3].v[0] = b->x1;    v[3].v[1] = b->y2;	v[3].v[2] = 1;
-	for (i = 0; i < 4; i++)
+pixman_f_transform_bounds (const struct pixman_f_transform *t,
+                           struct pixman_box16 *            b)
+{
+    struct pixman_f_vector v[4];
+    int i;
+    int x1, y1, x2, y2;
+
+    v[0].v[0] = b->x1;
+    v[0].v[1] = b->y1;
+    v[0].v[2] = 1;
+    v[1].v[0] = b->x2;
+    v[1].v[1] = b->y1;
+    v[1].v[2] = 1;
+    v[2].v[0] = b->x2;
+    v[2].v[1] = b->y2;
+    v[2].v[2] = 1;
+    v[3].v[0] = b->x1;
+    v[3].v[1] = b->y2;
+    v[3].v[2] = 1;
+
+    for (i = 0; i < 4; i++)
+    {
+	if (!pixman_f_transform_point (t, &v[i]))
+	    return FALSE;
+
+	x1 = floor (v[i].v[0]);
+	y1 = floor (v[i].v[1]);
+	x2 = ceil (v[i].v[0]);
+	y2 = ceil (v[i].v[1]);
+
+	if (i == 0)
+	{
+	    b->x1 = x1;
+	    b->y1 = y1;
+	    b->x2 = x2;
+	    b->y2 = y2;
+	}
+	else
 	{
-		if (!pixman_f_transform_point (t, &v[i]))
-			return FALSE;
-		x1 = floor (v[i].v[0]);
-		y1 = floor (v[i].v[1]);
-		x2 = ceil (v[i].v[0]);
-		y2 = ceil (v[i].v[1]);
-		if (i == 0)
-		{
-			b->x1 = x1; b->y1 = y1;
-			b->x2 = x2; b->y2 = y2;
-		}
-		else
-		{
-			if (x1 < b->x1) b->x1 = x1;
-			if (y1 < b->y1) b->y1 = y1;
-			if (x2 > b->x2) b->x2 = x2;
-			if (y2 > b->y2) b->y2 = y2;
-		}
+	    if (x1 < b->x1) b->x1 = x1;
+	    if (y1 < b->y1) b->y1 = y1;
+	    if (x2 > b->x2) b->x2 = x2;
+	    if (y2 > b->y2) b->y2 = y2;
 	}
-	return TRUE;
+    }
+
+    return TRUE;
 }
 
 PIXMAN_EXPORT void
 pixman_f_transform_init_identity (struct pixman_f_transform *t)
 {
-	int	i, j;
+    int i, j;
 
-	for (j = 0; j < 3; j++)
-		for (i = 0; i < 3; i++)
-			t->m[j][i] = i == j ? 1 : 0;
+    for (j = 0; j < 3; j++)
+    {
+	for (i = 0; i < 3; i++)
+	    t->m[j][i] = i == j ? 1 : 0;
+    }
 }
commit 19397bc732b30a861416220974edca6404d2890b
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:19:13 2009 -0400

    Reindent and reformat pixman-linear-gradient.c

diff --git a/pixman/pixman-linear-gradient.c b/pixman/pixman-linear-gradient.c
index 54330e7..8c4a8ff 100644
--- a/pixman/pixman-linear-gradient.c
+++ b/pixman/pixman-linear-gradient.c
@@ -30,23 +30,25 @@
 
 static source_pict_class_t
 linear_gradient_classify (pixman_image_t *image,
-			  int	          x,
-			  int	          y,
-			  int	          width,
-			  int	          height)
+                          int             x,
+                          int             y,
+                          int             width,
+                          int             height)
 {
     linear_gradient_t *linear = (linear_gradient_t *)image;
-    pixman_vector_t   v;
+    pixman_vector_t v;
     pixman_fixed_32_32_t l;
     pixman_fixed_48_16_t dx, dy, a, b, off;
     pixman_fixed_48_16_t factors[4];
-    int	     i;
-    
+    int i;
+
     image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
-    
+
     dx = linear->p2.x - linear->p1.x;
     dy = linear->p2.y - linear->p1.y;
+
     l = dx * dx + dy * dy;
+
     if (l)
     {
 	a = (dx << 32) / l;
@@ -56,40 +58,45 @@ linear_gradient_classify (pixman_image_t *image,
     {
 	a = b = 0;
     }
-    
+
     off = (-a * linear->p1.x
-	   -b * linear->p1.y) >> 16;
-    
+           -b * linear->p1.y) >> 16;
+
     for (i = 0; i < 3; i++)
     {
 	v.vector[0] = pixman_int_to_fixed ((i % 2) * (width  - 1) + x);
 	v.vector[1] = pixman_int_to_fixed ((i / 2) * (height - 1) + y);
 	v.vector[2] = pixman_fixed_1;
-	
+
 	if (image->common.transform)
 	{
 	    if (!pixman_transform_point_3d (image->common.transform, &v))
 	    {
 		image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
-		
+
 		return image->source.class;
 	    }
 	}
-	
+
 	factors[i] = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
     }
-    
+
     if (factors[2] == factors[0])
 	image->source.class = SOURCE_IMAGE_CLASS_HORIZONTAL;
     else if (factors[1] == factors[0])
 	image->source.class = SOURCE_IMAGE_CLASS_VERTICAL;
-    
+
     return image->source.class;
 }
 
 static void
-linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-				 const uint32_t *mask, uint32_t mask_bits)
+linear_gradient_get_scanline_32 (pixman_image_t *image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 uint32_t *      buffer,
+                                 const uint32_t *mask,
+                                 uint32_t        mask_bits)
 {
     pixman_vector_t v, unit;
     pixman_fixed_32_32_t l;
@@ -97,83 +104,102 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
     gradient_t *gradient = (gradient_t *)image;
     source_image_t *source = (source_image_t *)image;
     linear_gradient_t *linear = (linear_gradient_t *)image;
-    uint32_t       *end = buffer + width;
-    pixman_gradient_walker_t  walker;
-    
+    uint32_t *end = buffer + width;
+    pixman_gradient_walker_t walker;
+
     _pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
-    
+
     /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
     v.vector[2] = pixman_fixed_1;
-    if (source->common.transform) {
+
+    if (source->common.transform)
+    {
 	if (!pixman_transform_point_3d (source->common.transform, &v))
 	    return;
+	
 	unit.vector[0] = source->common.transform->matrix[0][0];
 	unit.vector[1] = source->common.transform->matrix[1][0];
 	unit.vector[2] = source->common.transform->matrix[2][0];
-    } else {
+    }
+    else
+    {
 	unit.vector[0] = pixman_fixed_1;
 	unit.vector[1] = 0;
 	unit.vector[2] = 0;
     }
-    
+
     dx = linear->p2.x - linear->p1.x;
     dy = linear->p2.y - linear->p1.y;
-    l = dx*dx + dy*dy;
-    if (l != 0) {
+
+    l = dx * dx + dy * dy;
+
+    if (l != 0)
+    {
 	a = (dx << 32) / l;
 	b = (dy << 32) / l;
-	off = (-a*linear->p1.x - b*linear->p1.y)>>16;
+	off = (-a * linear->p1.x
+	       -b * linear->p1.y) >> 16;
     }
-    if (l == 0  || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1)) {
+
+    if (l == 0 || (unit.vector[2] == 0 && v.vector[2] == pixman_fixed_1))
+    {
 	pixman_fixed_48_16_t inc, t;
+
 	/* affine transformation only */
-	if (l == 0) {
+	if (l == 0)
+	{
 	    t = 0;
 	    inc = 0;
-	} else {
-	    t = ((a*v.vector[0] + b*v.vector[1]) >> 16) + off;
+	}
+	else
+	{
+	    t = ((a * v.vector[0] + b * v.vector[1]) >> 16) + off;
 	    inc = (a * unit.vector[0] + b * unit.vector[1]) >> 16;
 	}
-	
+
 	if (source->class == SOURCE_IMAGE_CLASS_VERTICAL)
 	{
 	    register uint32_t color;
-	    
-	    color = _pixman_gradient_walker_pixel( &walker, t );
+
+	    color = _pixman_gradient_walker_pixel (&walker, t);
 	    while (buffer < end)
-		*(buffer++) = color;
+		*buffer++ = color;
 	}
 	else
 	{
-	    if (!mask) {
+	    if (!mask)
+	    {
 		while (buffer < end)
 		{
-		    *(buffer) = _pixman_gradient_walker_pixel (&walker, t);
-		    buffer += 1;
-		    t      += inc;
+		    *buffer++ = _pixman_gradient_walker_pixel (&walker, t);
+		    
+		    t += inc;
 		}
-	    } else {
-		while (buffer < end) {
+	    }
+	    else
+	    {
+		while (buffer < end)
+		{
 		    if (*mask++ & mask_bits)
-		    {
-			*(buffer) = _pixman_gradient_walker_pixel (&walker, t);
-		    }
-		    buffer += 1;
-		    t      += inc;
+			*buffer = _pixman_gradient_walker_pixel (&walker, t);
+
+		    buffer++;
+		    t += inc;
 		}
 	    }
 	}
     }
-    else /* projective transformation */
+    else
     {
+	/* projective transformation */
 	pixman_fixed_48_16_t t;
-	
+
 	if (source->class == SOURCE_IMAGE_CLASS_VERTICAL)
 	{
 	    register uint32_t color;
-	    
+
 	    if (v.vector[2] == 0)
 	    {
 		t = 0;
@@ -181,15 +207,15 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
 	    else
 	    {
 		pixman_fixed_48_16_t x, y;
-		
+
 		x = ((pixman_fixed_48_16_t) v.vector[0] << 16) / v.vector[2];
 		y = ((pixman_fixed_48_16_t) v.vector[1] << 16) / v.vector[2];
 		t = ((a * x + b * y) >> 16) + off;
 	    }
-	    
-	    color = _pixman_gradient_walker_pixel( &walker, t );
+
+	    color = _pixman_gradient_walker_pixel (&walker, t);
 	    while (buffer < end)
-		*(buffer++) = color;
+		*buffer++ = color;
 	}
 	else
 	{
@@ -197,17 +223,23 @@ linear_gradient_get_scanline_32 (pixman_image_t *image, int x, int y, int width,
 	    {
 		if (!mask || *mask++ & mask_bits)
 		{
-		    if (v.vector[2] == 0) {
+		    if (v.vector[2] == 0)
+		    {
 			t = 0;
-		    } else {
+		    }
+		    else
+		    {
 			pixman_fixed_48_16_t x, y;
 			x = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
 			y = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
-			t = ((a*x + b*y) >> 16) + off;
+			t = ((a * x + b * y) >> 16) + off;
 		    }
-		    *(buffer) = _pixman_gradient_walker_pixel (&walker, t);
+		    
+		    *buffer = _pixman_gradient_walker_pixel (&walker, t);
 		}
+		
 		++buffer;
+		
 		v.vector[0] += unit.vector[0];
 		v.vector[1] += unit.vector[1];
 		v.vector[2] += unit.vector[2];
@@ -224,38 +256,39 @@ linear_gradient_property_changed (pixman_image_t *image)
 }
 
 PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_linear_gradient (pixman_point_fixed_t         *p1,
-				     pixman_point_fixed_t         *p2,
-				     const pixman_gradient_stop_t *stops,
-				     int                           n_stops)
+pixman_image_create_linear_gradient (pixman_point_fixed_t *        p1,
+                                     pixman_point_fixed_t *        p2,
+                                     const pixman_gradient_stop_t *stops,
+                                     int                           n_stops)
 {
     pixman_image_t *image;
     linear_gradient_t *linear;
-    
+
     return_val_if_fail (n_stops >= 2, NULL);
-    
-    image = _pixman_image_allocate();
-    
+
+    image = _pixman_image_allocate ();
+
     if (!image)
 	return NULL;
-    
+
     linear = &image->linear;
-    
+
     if (!_pixman_init_gradient (&linear->common, stops, n_stops))
     {
 	free (image);
 	return NULL;
     }
-    
+
     linear->p1 = *p1;
     linear->p2 = *p2;
-    
+
     image->type = LINEAR;
     image->source.class = SOURCE_IMAGE_CLASS_UNKNOWN;
     image->common.classify = linear_gradient_classify;
     image->common.property_changed = linear_gradient_property_changed;
-    
+
     linear_gradient_property_changed (image);
-    
+
     return image;
 }
+
commit e8e08b35e7a8d221378e3a411efdfad74b1614e3
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:15:17 2009 -0400

    Reindent and reformat pixman-implementation.c

diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
index e0a4afd..c743948 100644
--- a/pixman/pixman-implementation.c
+++ b/pixman/pixman-implementation.c
@@ -26,109 +26,111 @@
 #include "pixman-private.h"
 
 static void
-delegate_composite (pixman_implementation_t *	imp,
-		    pixman_op_t			op,
-		    pixman_image_t *		src,
-		    pixman_image_t *		mask,
-		    pixman_image_t *		dest,
-		    int32_t			src_x,
-		    int32_t			src_y,
-		    int32_t			mask_x,
-		    int32_t			mask_y,
-		    int32_t			dest_x,
-		    int32_t			dest_y,
-		    int32_t			width,
-		    int32_t			height)
+delegate_composite (pixman_implementation_t * imp,
+                    pixman_op_t               op,
+                    pixman_image_t *          src,
+                    pixman_image_t *          mask,
+                    pixman_image_t *          dest,
+                    int32_t                   src_x,
+                    int32_t                   src_y,
+                    int32_t                   mask_x,
+                    int32_t                   mask_y,
+                    int32_t                   dest_x,
+                    int32_t                   dest_y,
+                    int32_t                   width,
+                    int32_t                   height)
 {
     _pixman_implementation_composite (imp->delegate,
-				      op,
-				      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
+                                      op,
+                                      src, mask, dest,
+                                      src_x, src_y,
+                                      mask_x, mask_y,
+                                      dest_x, dest_y,
+                                      width, height);
 }
 
 static void
-delegate_combine_32 (pixman_implementation_t *	imp,
-		     pixman_op_t		op,
-		     uint32_t *			dest,
-		     const uint32_t *		src,
-		     const uint32_t *		mask,
-		     int			width)
+delegate_combine_32 (pixman_implementation_t * imp,
+                     pixman_op_t               op,
+                     uint32_t *                dest,
+                     const uint32_t *          src,
+                     const uint32_t *          mask,
+                     int                       width)
 {
     _pixman_implementation_combine_32 (imp->delegate,
-				       op, dest, src, mask, width);
+                                       op, dest, src, mask, width);
 }
 
 static void
-delegate_combine_64 (pixman_implementation_t *	imp,
-		     pixman_op_t		op,
-		     uint64_t *			dest,
-		     const uint64_t *		src,
-		     const uint64_t *		mask,
-		     int			width)
+delegate_combine_64 (pixman_implementation_t * imp,
+                     pixman_op_t               op,
+                     uint64_t *                dest,
+                     const uint64_t *          src,
+                     const uint64_t *          mask,
+                     int                       width)
 {
     _pixman_implementation_combine_64 (imp->delegate,
-				       op, dest, src, mask, width);
+                                       op, dest, src, mask, width);
 }
 
 static void
-delegate_combine_32_ca (pixman_implementation_t *	imp,
-			pixman_op_t			op,
-			uint32_t *			dest,
-			const uint32_t *		src,
-			const uint32_t *		mask,
-			int				width)
+delegate_combine_32_ca (pixman_implementation_t * imp,
+                        pixman_op_t               op,
+                        uint32_t *                dest,
+                        const uint32_t *          src,
+                        const uint32_t *          mask,
+                        int                       width)
 {
     _pixman_implementation_combine_32_ca (imp->delegate,
-					  op, dest, src, mask, width);
+                                          op, dest, src, mask, width);
 }
 
 static void
-delegate_combine_64_ca (pixman_implementation_t *	imp,
-			pixman_op_t			op,
-			uint64_t *			dest,
-			const uint64_t *		src,
-			const uint64_t *		mask,
-			int				width)
+delegate_combine_64_ca (pixman_implementation_t * imp,
+                        pixman_op_t               op,
+                        uint64_t *                dest,
+                        const uint64_t *          src,
+                        const uint64_t *          mask,
+                        int                       width)
 {
     _pixman_implementation_combine_64_ca (imp->delegate,
-					  op, dest, src, mask, width);
+                                          op, dest, src, mask, width);
 }
 
 static pixman_bool_t
-delegate_blt (pixman_implementation_t *	imp,
-	      uint32_t *		src_bits,
-	      uint32_t *		dst_bits,
-	      int			src_stride,
-	      int			dst_stride,
-	      int			src_bpp,
-	      int			dst_bpp,
-	      int			src_x,
-	      int			src_y,
-	      int			dst_x,
-	      int			dst_y,
-	      int			width,
-	      int			height)
+delegate_blt (pixman_implementation_t * imp,
+              uint32_t *                src_bits,
+              uint32_t *                dst_bits,
+              int                       src_stride,
+              int                       dst_stride,
+              int                       src_bpp,
+              int                       dst_bpp,
+              int                       src_x,
+              int                       src_y,
+              int                       dst_x,
+              int                       dst_y,
+              int                       width,
+              int                       height)
 {
-    return _pixman_implementation_blt (imp->delegate, src_bits, dst_bits, src_stride, dst_stride,
-				       src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y,
-				       width, height);
+    return _pixman_implementation_blt (
+	imp->delegate, src_bits, dst_bits, src_stride, dst_stride,
+	src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y,
+	width, height);
 }
 
 static pixman_bool_t
 delegate_fill (pixman_implementation_t *imp,
-	       uint32_t *bits,
-	       int stride,
-	       int bpp,
-	       int x,
-	       int y,
-	       int width,
-	       int height,
-	       uint32_t xor)
+               uint32_t *               bits,
+               int                      stride,
+               int                      bpp,
+               int                      x,
+               int                      y,
+               int                      width,
+               int                      height,
+               uint32_t                 xor)
 {
-    return _pixman_implementation_fill (imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+    return _pixman_implementation_fill (
+	imp->delegate, bits, stride, bpp, x, y, width, height, xor);
 }
 
 pixman_implementation_t *
@@ -137,7 +139,7 @@ _pixman_implementation_create (pixman_implementation_t *delegate)
     pixman_implementation_t *imp = malloc (sizeof (pixman_implementation_t));
     pixman_implementation_t *d;
     int i;
-    
+
     if (!imp)
 	return NULL;
 
@@ -151,7 +153,7 @@ _pixman_implementation_create (pixman_implementation_t *delegate)
     imp->composite = delegate_composite;
     imp->blt = delegate_blt;
     imp->fill = delegate_fill;
-    
+
     for (i = 0; i < PIXMAN_OP_LAST; ++i)
     {
 	imp->combine_32[i] = delegate_combine_32;
@@ -159,105 +161,106 @@ _pixman_implementation_create (pixman_implementation_t *delegate)
 	imp->combine_32_ca[i] = delegate_combine_32_ca;
 	imp->combine_64_ca[i] = delegate_combine_64_ca;
     }
-    
+
     return imp;
 }
 
 void
-_pixman_implementation_combine_32 (pixman_implementation_t *	imp,
-				   pixman_op_t			op,
-				   uint32_t *			dest,
-				   const uint32_t *		src,
-				   const uint32_t *		mask,
-				   int				width)
+_pixman_implementation_combine_32 (pixman_implementation_t * imp,
+                                   pixman_op_t               op,
+                                   uint32_t *                dest,
+                                   const uint32_t *          src,
+                                   const uint32_t *          mask,
+                                   int                       width)
 {
-    (* imp->combine_32[op]) (imp, op, dest, src, mask, width);
+    (*imp->combine_32[op]) (imp, op, dest, src, mask, width);
 }
 
 void
-_pixman_implementation_combine_64 (pixman_implementation_t *	imp,
-				   pixman_op_t			op,
-				   uint64_t *			dest,
-				   const uint64_t *		src,
-				   const uint64_t *		mask,
-				   int				width)
+_pixman_implementation_combine_64 (pixman_implementation_t * imp,
+                                   pixman_op_t               op,
+                                   uint64_t *                dest,
+                                   const uint64_t *          src,
+                                   const uint64_t *          mask,
+                                   int                       width)
 {
-    (* imp->combine_64[op]) (imp, op, dest, src, mask, width);
+    (*imp->combine_64[op]) (imp, op, dest, src, mask, width);
 }
 
 void
-_pixman_implementation_combine_32_ca (pixman_implementation_t *	imp,
-				      pixman_op_t		op,
-				      uint32_t *		dest,
-				      const uint32_t *		src,
-				      const uint32_t *		mask,
-				      int			width)
+_pixman_implementation_combine_32_ca (pixman_implementation_t * imp,
+                                      pixman_op_t               op,
+                                      uint32_t *                dest,
+                                      const uint32_t *          src,
+                                      const uint32_t *          mask,
+                                      int                       width)
 {
-    (* imp->combine_32_ca[op]) (imp, op, dest, src, mask, width);
+    (*imp->combine_32_ca[op]) (imp, op, dest, src, mask, width);
 }
 
 void
-_pixman_implementation_combine_64_ca (pixman_implementation_t *	imp,
-				      pixman_op_t		op,
-				      uint64_t *		dest,
-				      const uint64_t *		src,
-				      const uint64_t *		mask,
-				      int			width)
+_pixman_implementation_combine_64_ca (pixman_implementation_t * imp,
+                                      pixman_op_t               op,
+                                      uint64_t *                dest,
+                                      const uint64_t *          src,
+                                      const uint64_t *          mask,
+                                      int                       width)
 {
-    (* imp->combine_64_ca[op]) (imp, op, dest, src, mask, width);
+    (*imp->combine_64_ca[op]) (imp, op, dest, src, mask, width);
 }
 
 void
-_pixman_implementation_composite (pixman_implementation_t *	imp,
-				  pixman_op_t			op,
-				  pixman_image_t *		src,
-				  pixman_image_t *		mask,
-				  pixman_image_t *		dest,
-				  int32_t			src_x,
-				  int32_t			src_y,
-				  int32_t			mask_x,
-				  int32_t			mask_y,
-				  int32_t			dest_x,
-				  int32_t			dest_y,
-				  int32_t			width,
-				  int32_t			height)
+_pixman_implementation_composite (pixman_implementation_t * imp,
+                                  pixman_op_t               op,
+                                  pixman_image_t *          src,
+                                  pixman_image_t *          mask,
+                                  pixman_image_t *          dest,
+                                  int32_t                   src_x,
+                                  int32_t                   src_y,
+                                  int32_t                   mask_x,
+                                  int32_t                   mask_y,
+                                  int32_t                   dest_x,
+                                  int32_t                   dest_y,
+                                  int32_t                   width,
+                                  int32_t                   height)
 {
-    (* imp->composite) (imp, op,
-			src, mask, dest,
-			src_x, src_y, mask_x, mask_y, dest_x, dest_y,
-			width, height);
+    (*imp->composite) (imp, op,
+		       src, mask, dest,
+		       src_x, src_y, mask_x, mask_y, dest_x, dest_y,
+		       width, height);
 }
 
 pixman_bool_t
-_pixman_implementation_blt (pixman_implementation_t *	imp,
-			    uint32_t *			src_bits,
-			    uint32_t *			dst_bits,
-			    int				src_stride,
-			    int				dst_stride,
-			    int				src_bpp,
-			    int				dst_bpp,
-			    int				src_x,
-			    int				src_y,
-			    int				dst_x,
-			    int				dst_y,
-			    int				width,
-			    int				height)
+_pixman_implementation_blt (pixman_implementation_t * imp,
+                            uint32_t *                src_bits,
+                            uint32_t *                dst_bits,
+                            int                       src_stride,
+                            int                       dst_stride,
+                            int                       src_bpp,
+                            int                       dst_bpp,
+                            int                       src_x,
+                            int                       src_y,
+                            int                       dst_x,
+                            int                       dst_y,
+                            int                       width,
+                            int                       height)
 {
-    return (* imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride,
-			 src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y,
-			 width, height);
+    return (*imp->blt) (imp, src_bits, dst_bits, src_stride, dst_stride,
+			src_bpp, dst_bpp, src_x, src_y, dst_x, dst_y,
+			width, height);
 }
 
 pixman_bool_t
 _pixman_implementation_fill (pixman_implementation_t *imp,
-			     uint32_t *bits,
-			     int stride,
-			     int bpp,
-			     int x,
-			     int y,
-			     int width,
-			     int height,
-			     uint32_t xor)
+                             uint32_t *               bits,
+                             int                      stride,
+                             int                      bpp,
+                             int                      x,
+                             int                      y,
+                             int                      width,
+                             int                      height,
+                             uint32_t                 xor)
 {
-    return (* imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor);
+    return (*imp->fill) (imp, bits, stride, bpp, x, y, width, height, xor);
 }
+
commit 2c74165179b07f31b82402d74dc9fbaf8bf52191
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:13:17 2009 -0400

    Reindent and reformat pixman-image.c

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index d7020f2..f6bda58 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -33,9 +33,9 @@
 #include "pixman-combine32.h"
 
 pixman_bool_t
-_pixman_init_gradient (gradient_t     *gradient,
-		       const pixman_gradient_stop_t *stops,
-		       int	       n_stops)
+_pixman_init_gradient (gradient_t *                  gradient,
+                       const pixman_gradient_stop_t *stops,
+                       int                           n_stops)
 {
     return_val_if_fail (n_stops > 0, FALSE);
 
@@ -62,27 +62,33 @@ _pixman_init_gradient (gradient_t     *gradient,
  * depth, but that's a project for the future.
  */
 void
-_pixman_image_get_scanline_generic_64 (pixman_image_t * pict, int x, int y,
-				       int width, uint32_t *buffer,
-				       const uint32_t *mask, uint32_t mask_bits)
+_pixman_image_get_scanline_generic_64 (pixman_image_t * pict,
+                                       int              x,
+                                       int              y,
+                                       int              width,
+                                       uint32_t *       buffer,
+                                       const uint32_t * mask,
+                                       uint32_t         mask_bits)
 {
     uint32_t *mask8 = NULL;
 
-    // Contract the mask image, if one exists, so that the 32-bit fetch
-    // function can use it.
-    if (mask) {
-        mask8 = pixman_malloc_ab(width, sizeof(uint32_t));
+    /* Contract the mask image, if one exists, so that the 32-bit fetch
+     * function can use it.
+     */
+    if (mask)
+    {
+	mask8 = pixman_malloc_ab (width, sizeof(uint32_t));
 	if (!mask8)
 	    return;
-	
-        pixman_contract (mask8, (uint64_t *)mask, width);
+
+	pixman_contract (mask8, (uint64_t *)mask, width);
     }
 
-    // Fetch the source image into the first half of buffer.
+    /* Fetch the source image into the first half of buffer. */
     _pixman_image_get_scanline_32 (pict, x, y, width, (uint32_t*)buffer, mask8,
-				   mask_bits);
+                                   mask_bits);
 
-    // Expand from 32bpp to 64bpp in place.
+    /* Expand from 32bpp to 64bpp in place. */
     pixman_expand ((uint64_t *)buffer, buffer, PIXMAN_a8r8g8b8, width);
 
     free (mask8);
@@ -123,10 +129,10 @@ _pixman_image_allocate (void)
 
 source_pict_class_t
 _pixman_image_classify (pixman_image_t *image,
-			int             x,
-			int             y,
-			int             width,
-			int             height)
+                        int             x,
+                        int             y,
+                        int             width,
+                        int             height)
 {
     if (image->common.classify)
 	return image->common.classify (image, x, y, width, height);
@@ -135,8 +141,13 @@ _pixman_image_classify (pixman_image_t *image,
 }
 
 void
-_pixman_image_get_scanline_32 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-			       const uint32_t *mask, uint32_t mask_bits)
+_pixman_image_get_scanline_32 (pixman_image_t *image,
+                               int             x,
+                               int             y,
+                               int             width,
+                               uint32_t *      buffer,
+                               const uint32_t *mask,
+                               uint32_t        mask_bits)
 {
     image->common.get_scanline_32 (image, x, y, width, buffer, mask, mask_bits);
 }
@@ -145,8 +156,13 @@ _pixman_image_get_scanline_32 (pixman_image_t *image, int x, int y, int width, u
  * a uint64_t *buffer.
  */
 void
-_pixman_image_get_scanline_64 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-			       const uint32_t *unused, uint32_t unused2)
+_pixman_image_get_scanline_64 (pixman_image_t *image,
+                               int             x,
+                               int             y,
+                               int             width,
+                               uint32_t *      buffer,
+                               const uint32_t *unused,
+                               uint32_t        unused2)
 {
     image->common.get_scanline_64 (image, x, y, width, buffer, unused, unused2);
 }
@@ -178,7 +194,7 @@ pixman_image_unref (pixman_image_t *image)
     {
 	if (image->common.destroy_func)
 	    image->common.destroy_func (image, image->common.destroy_data);
-	
+
 	pixman_region32_fini (&common->clip_region);
 
 	if (common->transform)
@@ -190,21 +206,14 @@ pixman_image_unref (pixman_image_t *image)
 	if (common->alpha_map)
 	    pixman_image_unref ((pixman_image_t *)common->alpha_map);
 
-#if 0
-	if (image->type == BITS && image->bits.indexed)
-	    free (image->bits.indexed);
-#endif
-
-#if 0
-	memset (image, 0xaa, sizeof (pixman_image_t));
-#endif
-	if (image->type == LINEAR || image->type == RADIAL || image->type == CONICAL)
+	if (image->type == LINEAR ||
+	    image->type == RADIAL ||
+	    image->type == CONICAL)
 	{
 	    if (image->gradient.stops)
 		free (image->gradient.stops);
 	}
 
-
 	if (image->type == BITS && image->bits.free_me)
 	    free (image->bits.free_me);
 
@@ -217,16 +226,13 @@ pixman_image_unref (pixman_image_t *image)
 }
 
 PIXMAN_EXPORT void
-pixman_image_set_destroy_function (pixman_image_t *image,
-				   pixman_image_destroy_func_t func,
-				   void *data)
+pixman_image_set_destroy_function (pixman_image_t *            image,
+                                   pixman_image_destroy_func_t func,
+                                   void *                      data)
 {
     image->common.destroy_func = func;
     image->common.destroy_data = data;
 }
-			       
-
-/* Constructors */
 
 void
 _pixman_image_reset_clip_region (pixman_image_t *image)
@@ -235,8 +241,8 @@ _pixman_image_reset_clip_region (pixman_image_t *image)
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region32 (pixman_image_t *image,
-				pixman_region32_t *region)
+pixman_image_set_clip_region32 (pixman_image_t *   image,
+                                pixman_region32_t *region)
 {
     image_common_t *common = (image_common_t *)image;
     pixman_bool_t result;
@@ -258,10 +264,9 @@ pixman_image_set_clip_region32 (pixman_image_t *image,
     return result;
 }
 
-
 PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_clip_region (pixman_image_t    *image,
-			      pixman_region16_t *region)
+pixman_image_set_clip_region (pixman_image_t *   image,
+                              pixman_region16_t *region)
 {
     image_common_t *common = (image_common_t *)image;
     pixman_bool_t result;
@@ -285,21 +290,20 @@ pixman_image_set_clip_region (pixman_image_t    *image,
 
 PIXMAN_EXPORT void
 pixman_image_set_has_client_clip (pixman_image_t *image,
-				  pixman_bool_t	  client_clip)
+                                  pixman_bool_t   client_clip)
 {
     image->common.client_clip = client_clip;
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_transform (pixman_image_t           *image,
-			    const pixman_transform_t *transform)
+pixman_image_set_transform (pixman_image_t *          image,
+                            const pixman_transform_t *transform)
 {
     static const pixman_transform_t id =
     {
 	{ { pixman_fixed_1, 0, 0 },
 	  { 0, pixman_fixed_1, 0 },
-	  { 0, 0, pixman_fixed_1 }
-	}
+	  { 0, 0, pixman_fixed_1 }}
     };
 
     image_common_t *common = (image_common_t *)image;
@@ -310,9 +314,10 @@ pixman_image_set_transform (pixman_image_t           *image,
 
     if (memcmp (&id, transform, sizeof (pixman_transform_t)) == 0)
     {
-	free(common->transform);
+	free (common->transform);
 	common->transform = NULL;
 	result = TRUE;
+
 	goto out;
     }
 
@@ -322,20 +327,21 @@ pixman_image_set_transform (pixman_image_t           *image,
     if (common->transform == NULL)
     {
 	result = FALSE;
+
 	goto out;
     }
 
-    memcpy(common->transform, transform, sizeof(pixman_transform_t));
+    memcpy (common->transform, transform, sizeof(pixman_transform_t));
 
 out:
     image_property_changed (image);
-    
+
     return TRUE;
 }
 
 PIXMAN_EXPORT void
-pixman_image_set_repeat (pixman_image_t  *image,
-			 pixman_repeat_t  repeat)
+pixman_image_set_repeat (pixman_image_t *image,
+                         pixman_repeat_t repeat)
 {
     image->common.repeat = repeat;
 
@@ -343,10 +349,10 @@ pixman_image_set_repeat (pixman_image_t  *image,
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_image_set_filter (pixman_image_t       *image,
-			 pixman_filter_t       filter,
-			 const pixman_fixed_t *params,
-			 int		       n_params)
+pixman_image_set_filter (pixman_image_t *      image,
+                         pixman_filter_t       filter,
+                         const pixman_fixed_t *params,
+                         int                   n_params)
 {
     image_common_t *common = (image_common_t *)image;
     pixman_fixed_t *new_params;
@@ -362,7 +368,7 @@ pixman_image_set_filter (pixman_image_t       *image,
 	    return FALSE;
 
 	memcpy (new_params,
-		params, n_params * sizeof (pixman_fixed_t));
+	        params, n_params * sizeof (pixman_fixed_t));
     }
 
     common->filter = filter;
@@ -378,8 +384,8 @@ pixman_image_set_filter (pixman_image_t       *image,
 }
 
 PIXMAN_EXPORT void
-pixman_image_set_source_clipping (pixman_image_t  *image,
-				  pixman_bool_t    clip_sources)
+pixman_image_set_source_clipping (pixman_image_t *image,
+                                  pixman_bool_t   clip_sources)
 {
     image->common.clip_sources = clip_sources;
 
@@ -391,8 +397,8 @@ pixman_image_set_source_clipping (pixman_image_t  *image,
  * way, way too expensive.
  */
 PIXMAN_EXPORT void
-pixman_image_set_indexed (pixman_image_t	 *image,
-			  const pixman_indexed_t *indexed)
+pixman_image_set_indexed (pixman_image_t *        image,
+                          const pixman_indexed_t *indexed)
 {
     bits_image_t *bits = (bits_image_t *)image;
 
@@ -403,9 +409,9 @@ pixman_image_set_indexed (pixman_image_t	 *image,
 
 PIXMAN_EXPORT void
 pixman_image_set_alpha_map (pixman_image_t *image,
-			    pixman_image_t *alpha_map,
-			    int16_t         x,
-			    int16_t         y)
+                            pixman_image_t *alpha_map,
+                            int16_t         x,
+                            int16_t         y)
 {
     image_common_t *common = (image_common_t *)image;
 
@@ -429,19 +435,18 @@ pixman_image_set_alpha_map (pixman_image_t *image,
 }
 
 PIXMAN_EXPORT void
-pixman_image_set_component_alpha   (pixman_image_t       *image,
-				    pixman_bool_t         component_alpha)
+pixman_image_set_component_alpha   (pixman_image_t *image,
+                                    pixman_bool_t   component_alpha)
 {
     image->common.component_alpha = component_alpha;
 
     image_property_changed (image);
 }
 
-
 PIXMAN_EXPORT void
-pixman_image_set_accessors (pixman_image_t             *image,
-			    pixman_read_memory_func_t	read_func,
-			    pixman_write_memory_func_t	write_func)
+pixman_image_set_accessors (pixman_image_t *           image,
+                            pixman_read_memory_func_t  read_func,
+                            pixman_write_memory_func_t write_func)
 {
     return_if_fail (image != NULL);
 
@@ -502,9 +507,9 @@ _pixman_image_is_solid (pixman_image_t *image)
     if (image->type == SOLID)
 	return TRUE;
 
-    if (image->type != BITS	||
-	image->bits.width != 1	||
-	image->bits.height != 1)
+    if (image->type != BITS     ||
+        image->bits.width != 1  ||
+        image->bits.height != 1)
     {
 	return FALSE;
     }
@@ -516,22 +521,23 @@ _pixman_image_is_solid (pixman_image_t *image)
 }
 
 uint32_t
-_pixman_image_get_solid (pixman_image_t *image, pixman_format_code_t format)
+_pixman_image_get_solid (pixman_image_t *     image,
+                         pixman_format_code_t format)
 {
     uint32_t result;
-    
+
     _pixman_image_get_scanline_32 (image, 0, 0, 1, &result, NULL, 0);
-    
+
     /* If necessary, convert RGB <--> BGR. */
     if (PIXMAN_FORMAT_TYPE (format) != PIXMAN_TYPE_ARGB)
     {
 	result = (((result & 0xff000000) >>  0) |
-		  ((result & 0x00ff0000) >> 16) |
-		  ((result & 0x0000ff00) >>  0) |
-		  ((result & 0x000000ff) << 16));
-    }									
-    
-    return result;							
+	          ((result & 0x00ff0000) >> 16) |
+	          ((result & 0x0000ff00) >>  0) |
+	          ((result & 0x000000ff) << 16));
+    }
+
+    return result;
 }
 
 pixman_bool_t
@@ -540,46 +546,47 @@ _pixman_image_is_opaque (pixman_image_t *image)
     int i;
 
     if (image->common.alpha_map)
-        return FALSE;
+	return FALSE;
 
     switch (image->type)
     {
     case BITS:
 	if (image->common.repeat == PIXMAN_REPEAT_NONE)
 	    return FALSE;
-	
-        if (PIXMAN_FORMAT_A (image->bits.format))
-            return FALSE;
-        break;
+
+	if (PIXMAN_FORMAT_A (image->bits.format))
+	    return FALSE;
+	break;
 
     case LINEAR:
     case RADIAL:
 	if (image->common.repeat == PIXMAN_REPEAT_NONE)
 	    return FALSE;
-	
+
 	for (i = 0; i < image->gradient.n_stops; ++i)
 	{
-            if (image->gradient.stops[i].color.alpha != 0xffff)
-                return FALSE;
-        }
-        break;
+	    if (image->gradient.stops[i].color.alpha != 0xffff)
+		return FALSE;
+	}
+	break;
 
     case CONICAL:
 	/* Conical gradients always have a transparent border */
 	return FALSE;
 	break;
-	
+
     case SOLID:
 	if (ALPHA_8 (image->solid.color) != 0xff)
-            return FALSE;
-        break;
+	    return FALSE;
+	break;
     }
 
     /* Convolution filters can introduce translucency if the sum of the
      * weights is lower than 1.
      */
     if (image->common.filter == PIXMAN_FILTER_CONVOLUTION)
-         return FALSE;
+	return FALSE;
 
-     return TRUE;
+    return TRUE;
 }
+
commit 5aadc28e19328054b15c7ee88996c407a9a7d9b3
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:10:53 2009 -0400

    Reindent and reformat pixman-gradient-walker.c

diff --git a/pixman/pixman-gradient-walker.c b/pixman/pixman-gradient-walker.c
index 79010a2..1797caf 100644
--- a/pixman/pixman-gradient-walker.c
+++ b/pixman/pixman-gradient-walker.c
@@ -27,9 +27,9 @@
 #include "pixman-private.h"
 
 void
-_pixman_gradient_walker_init (pixman_gradient_walker_t  *walker,
-			      gradient_t      *gradient,
-			      unsigned int     spread)
+_pixman_gradient_walker_init (pixman_gradient_walker_t *walker,
+                              gradient_t *              gradient,
+                              unsigned int              spread)
 {
     walker->num_stops = gradient->n_stops;
     walker->stops     = gradient->stops;
@@ -41,21 +41,21 @@ _pixman_gradient_walker_init (pixman_gradient_walker_t  *walker,
     walker->right_ag  = 0;
     walker->right_rb  = 0;
     walker->spread    = spread;
-    
+
     walker->need_reset = TRUE;
 }
 
 void
-_pixman_gradient_walker_reset (pixman_gradient_walker_t       *walker,
-			       pixman_fixed_32_32_t  pos)
+_pixman_gradient_walker_reset (pixman_gradient_walker_t *walker,
+                               pixman_fixed_32_32_t      pos)
 {
-    int32_t                  x, left_x, right_x;
+    int32_t x, left_x, right_x;
     pixman_color_t          *left_c, *right_c;
-    int                      n, count = walker->num_stops;
+    int n, count = walker->num_stops;
     pixman_gradient_stop_t *      stops = walker->stops;
-    
-    static const pixman_color_t   transparent_black = { 0, 0, 0, 0 };
-    
+
+    static const pixman_color_t transparent_black = { 0, 0, 0, 0 };
+
     switch (walker->spread)
     {
     case PIXMAN_REPEAT_NORMAL:
@@ -63,47 +63,59 @@ _pixman_gradient_walker_reset (pixman_gradient_walker_t       *walker,
 	for (n = 0; n < count; n++)
 	    if (x < stops[n].x)
 		break;
-	if (n == 0) {
-	    left_x =  stops[count-1].x - 0x10000;
-	    left_c = &stops[count-1].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-	
-	if (n == count) {
+	if (n == 0)
+	{
+	    left_x =  stops[count - 1].x - 0x10000;
+	    left_c = &stops[count - 1].color;
+	}
+	else
+	{
+	    left_x =  stops[n - 1].x;
+	    left_c = &stops[n - 1].color;
+	}
+
+	if (n == count)
+	{
 	    right_x =  stops[0].x + 0x10000;
 	    right_c = &stops[0].color;
-	} else {
+	}
+	else
+	{
 	    right_x =  stops[n].x;
 	    right_c = &stops[n].color;
 	}
 	left_x  += (pos - x);
 	right_x += (pos - x);
 	break;
-	
+
     case PIXMAN_REPEAT_PAD:
 	for (n = 0; n < count; n++)
 	    if (pos < stops[n].x)
 		break;
-	
-	if (n == 0) {
+
+	if (n == 0)
+	{
 	    left_x =  INT32_MIN;
 	    left_c = &stops[0].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
 	}
-	
-	if (n == count) {
+	else
+	{
+	    left_x =  stops[n - 1].x;
+	    left_c = &stops[n - 1].color;
+	}
+
+	if (n == count)
+	{
 	    right_x =  INT32_MAX;
-	    right_c = &stops[n-1].color;
-	} else {
+	    right_c = &stops[n - 1].color;
+	}
+	else
+	{
 	    right_x =  stops[n].x;
 	    right_c = &stops[n].color;
 	}
 	break;
-	
+
     case PIXMAN_REPEAT_REFLECT:
 	x = (int32_t)pos & 0xFFFF;
 	if ((int32_t)pos & 0x10000)
@@ -111,46 +123,53 @@ _pixman_gradient_walker_reset (pixman_gradient_walker_t       *walker,
 	for (n = 0; n < count; n++)
 	    if (x < stops[n].x)
 		break;
-	
-	if (n == 0) {
+
+	if (n == 0)
+	{
 	    left_x =  -stops[0].x;
 	    left_c = &stops[0].color;
-	} else {
-	    left_x =  stops[n-1].x;
-	    left_c = &stops[n-1].color;
-	}
-	
-	if (n == count) {
-	    right_x = 0x20000 - stops[n-1].x;
-	    right_c = &stops[n-1].color;
-	} else {
+	}
+	else
+	{
+	    left_x =  stops[n - 1].x;
+	    left_c = &stops[n - 1].color;
+	}
+
+	if (n == count)
+	{
+	    right_x = 0x20000 - stops[n - 1].x;
+	    right_c = &stops[n - 1].color;
+	}
+	else
+	{
 	    right_x =  stops[n].x;
 	    right_c = &stops[n].color;
 	}
-	
-	if ((int32_t)pos & 0x10000) {
+
+	if ((int32_t)pos & 0x10000)
+	{
 	    pixman_color_t  *tmp_c;
-	    int32_t          tmp_x;
-	    
+	    int32_t tmp_x;
+
 	    tmp_x   = 0x10000 - right_x;
 	    right_x = 0x10000 - left_x;
 	    left_x  = tmp_x;
-	    
+
 	    tmp_c   = right_c;
 	    right_c = left_c;
 	    left_c  = tmp_c;
-	    
+
 	    x = 0x10000 - x;
 	}
 	left_x  += (pos - x);
 	right_x += (pos - x);
 	break;
-	
+
     default:  /* REPEAT_NONE */
 	for (n = 0; n < count; n++)
 	    if (pos < stops[n].x)
 		break;
-	
+
 	if (n == 0)
 	{
 	    left_x  =  INT32_MIN;
@@ -159,74 +178,75 @@ _pixman_gradient_walker_reset (pixman_gradient_walker_t       *walker,
 	}
 	else if (n == count)
 	{
-	    left_x  = stops[n-1].x;
+	    left_x  = stops[n - 1].x;
 	    right_x = INT32_MAX;
 	    left_c  = right_c = (pixman_color_t*) &transparent_black;
 	}
 	else
 	{
-	    left_x  =  stops[n-1].x;
+	    left_x  =  stops[n - 1].x;
 	    right_x =  stops[n].x;
-	    left_c  = &stops[n-1].color;
+	    left_c  = &stops[n - 1].color;
 	    right_c = &stops[n].color;
 	}
     }
-    
+
     walker->left_x   = left_x;
     walker->right_x  = right_x;
     walker->left_ag  = ((left_c->alpha >> 8) << 16)   | (left_c->green >> 8);
     walker->left_rb  = ((left_c->red & 0xff00) << 8)  | (left_c->blue >> 8);
     walker->right_ag = ((right_c->alpha >> 8) << 16)  | (right_c->green >> 8);
     walker->right_rb = ((right_c->red & 0xff00) << 8) | (right_c->blue >> 8);
-    
-    if ( walker->left_x == walker->right_x                ||
-	 ( walker->left_ag == walker->right_ag &&
-	   walker->left_rb == walker->right_rb )   )
+
+    if (walker->left_x == walker->right_x                ||
+        ( walker->left_ag == walker->right_ag &&
+          walker->left_rb == walker->right_rb )   )
     {
 	walker->stepper = 0;
     }
     else
     {
 	int32_t width = right_x - left_x;
-	walker->stepper = ((1 << 24) + width/2)/width;
+	walker->stepper = ((1 << 24) + width / 2) / width;
     }
-    
+
     walker->need_reset = FALSE;
 }
 
-#define  PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x)				\
+#define  PIXMAN_GRADIENT_WALKER_NEED_RESET(w, x)                         \
     ( (w)->need_reset || (x) < (w)->left_x || (x) >= (w)->right_x)
 
 
 /* the following assumes that PIXMAN_GRADIENT_WALKER_NEED_RESET(w,x) is FALSE */
 uint32_t
-_pixman_gradient_walker_pixel (pixman_gradient_walker_t  *walker,
-			       pixman_fixed_32_32_t     x)
+_pixman_gradient_walker_pixel (pixman_gradient_walker_t *walker,
+                               pixman_fixed_32_32_t      x)
 {
-    int  dist, idist;
-    uint32_t  t1, t2, a, color;
-    
+    int dist, idist;
+    uint32_t t1, t2, a, color;
+
     if (PIXMAN_GRADIENT_WALKER_NEED_RESET (walker, x))
-        _pixman_gradient_walker_reset (walker, x);
-    
-    dist  = ((int)(x - walker->left_x)*walker->stepper) >> 16;
+	_pixman_gradient_walker_reset (walker, x);
+
+    dist  = ((int)(x - walker->left_x) * walker->stepper) >> 16;
     idist = 256 - dist;
-    
+
     /* combined INTERPOLATE and premultiply */
-    t1 = walker->left_rb*idist + walker->right_rb*dist;
+    t1 = walker->left_rb * idist + walker->right_rb * dist;
     t1 = (t1 >> 8) & 0xff00ff;
-    
-    t2  = walker->left_ag*idist + walker->right_ag*dist;
+
+    t2  = walker->left_ag * idist + walker->right_ag * dist;
     t2 &= 0xff00ff00;
-    
+
     color = t2 & 0xff000000;
     a     = t2 >> 24;
-    
-    t1  = t1*a + 0x800080;
+
+    t1  = t1 * a + 0x800080;
     t1  = (t1 + ((t1 >> 8) & 0xff00ff)) >> 8;
-    
-    t2  = (t2 >> 8)*a + 0x800080;
+
+    t2  = (t2 >> 8) * a + 0x800080;
     t2  = (t2 + ((t2 >> 8) & 0xff00ff));
-    
+
     return (color | (t1 & 0xff00ff) | (t2 & 0xff00));
 }
+
commit ac043ac2da643d872f519971a316f8bc6bdca0f8
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:09:24 2009 -0400

    Reindent and reformat pixman-general.c

diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index d9bdad7..df98b98 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -41,26 +41,29 @@
 
 static void
 general_composite_rect  (pixman_implementation_t *imp,
-			 pixman_op_t              op,
-			 pixman_image_t          *src,
-			 pixman_image_t          *mask,
-			 pixman_image_t          *dest,
-			 int32_t                  src_x,
-			 int32_t                  src_y,
-			 int32_t                  mask_x,
-			 int32_t                  mask_y,
-			 int32_t                  dest_x,
-			 int32_t                  dest_y,
-			 int32_t                  width,
-			 int32_t                  height)
+                         pixman_op_t              op,
+                         pixman_image_t *         src,
+                         pixman_image_t *         mask,
+                         pixman_image_t *         dest,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
 {
     uint8_t stack_scanline_buffer[SCANLINE_BUFFER_LENGTH * 3];
-    const pixman_format_code_t src_format = src->type == BITS ? src->bits.format : 0;
-    const pixman_format_code_t mask_format = mask && mask->type == BITS ? mask->bits.format : 0;
-    const pixman_format_code_t dest_format = dest->type == BITS ? dest->bits.format : 0;
-    const int src_wide = PIXMAN_FORMAT_IS_WIDE(src_format);
-    const int mask_wide = mask && PIXMAN_FORMAT_IS_WIDE(mask_format);
-    const int dest_wide = PIXMAN_FORMAT_IS_WIDE(dest_format);
+    const pixman_format_code_t src_format =
+	src->type == BITS ? src->bits.format : 0;
+    const pixman_format_code_t mask_format =
+	mask && mask->type == BITS ? mask->bits.format : 0;
+    const pixman_format_code_t dest_format =
+	dest->type == BITS ? dest->bits.format : 0;
+    const int src_wide = PIXMAN_FORMAT_IS_WIDE (src_format);
+    const int mask_wide = mask && PIXMAN_FORMAT_IS_WIDE (mask_format);
+    const int dest_wide = PIXMAN_FORMAT_IS_WIDE (dest_format);
     const int wide = src_wide || mask_wide || dest_wide;
     const int Bpp = wide ? 8 : 4;
     uint8_t *scanline_buffer = stack_scanline_buffer;
@@ -73,45 +76,46 @@ general_composite_rect  (pixman_implementation_t *imp,
     uint32_t *bits;
     int32_t stride;
     int i;
-    
+
     if (width * Bpp > SCANLINE_BUFFER_LENGTH)
     {
 	scanline_buffer = pixman_malloc_abc (width, 3, Bpp);
-	
+
 	if (!scanline_buffer)
 	    return;
     }
-    
+
     src_buffer = scanline_buffer;
     mask_buffer = src_buffer + width * Bpp;
     dest_buffer = mask_buffer + width * Bpp;
-    
+
     src_class = _pixman_image_classify (src,
-				       src_x, src_y,
-				       width, height);
-    
+                                        src_x, src_y,
+                                        width, height);
+
     mask_class = SOURCE_IMAGE_CLASS_UNKNOWN;
+
     if (mask)
     {
 	mask_class = _pixman_image_classify (mask,
-					    src_x, src_y,
-					    width, height);
+	                                     src_x, src_y,
+	                                     width, height);
     }
-    
+
     if (op == PIXMAN_OP_CLEAR)
-        fetch_src = NULL;
+	fetch_src = NULL;
     else if (wide)
 	fetch_src = _pixman_image_get_scanline_64;
     else
 	fetch_src = _pixman_image_get_scanline_32;
-    
+
     if (!mask || op == PIXMAN_OP_CLEAR)
 	fetch_mask = NULL;
     else if (wide)
 	fetch_mask = _pixman_image_get_scanline_64;
     else
 	fetch_mask = _pixman_image_get_scanline_32;
-    
+
     if (op == PIXMAN_OP_CLEAR || op == PIXMAN_OP_SRC)
 	fetch_dest = NULL;
     else if (wide)
@@ -129,15 +133,15 @@ general_composite_rect  (pixman_implementation_t *imp,
      * the destination format.
      */
     if (!wide &&
-	!dest->common.alpha_map &&
-	!dest->common.write_func && 
-	(op == PIXMAN_OP_ADD || op == PIXMAN_OP_OVER) &&
-	(dest->bits.format == PIXMAN_a8r8g8b8 ||
-	 dest->bits.format == PIXMAN_x8r8g8b8))
+        !dest->common.alpha_map &&
+        !dest->common.write_func &&
+        (op == PIXMAN_OP_ADD || op == PIXMAN_OP_OVER) &&
+        (dest->bits.format == PIXMAN_a8r8g8b8 ||
+         dest->bits.format == PIXMAN_x8r8g8b8))
     {
 	store = NULL;
     }
-    
+
     if (!store)
     {
 	bits = dest->bits.bits;
@@ -148,15 +152,15 @@ general_composite_rect  (pixman_implementation_t *imp,
 	bits = NULL;
 	stride = 0;
     }
-    
+
     component_alpha =
-	fetch_src			&&
-	fetch_mask			&&
-	mask				&&
-	mask->common.type == BITS	&&
-	mask->common.component_alpha	&&
-	PIXMAN_FORMAT_RGB (mask->bits.format);
-    
+        fetch_src                       &&
+        fetch_mask                      &&
+        mask                            &&
+        mask->common.type == BITS       &&
+        mask->common.component_alpha    &&
+        PIXMAN_FORMAT_RGB (mask->bits.format);
+
     if (wide)
     {
 	if (component_alpha)
@@ -171,13 +175,13 @@ general_composite_rect  (pixman_implementation_t *imp,
 	else
 	    compose = _pixman_implementation_combine_32;
     }
-    
+
     if (!compose)
 	return;
-    
+
     if (!fetch_mask)
 	mask_buffer = NULL;
-    
+
     for (i = 0; i < height; ++i)
     {
 	/* fill first half of scanline with source */
@@ -188,105 +192,115 @@ general_composite_rect  (pixman_implementation_t *imp,
 		/* fetch mask before source so that fetching of
 		   source can be optimized */
 		fetch_mask (mask, mask_x, mask_y + i,
-			   width, (void *)mask_buffer, 0, 0);
-		
+		            width, (void *)mask_buffer, 0, 0);
+
 		if (mask_class == SOURCE_IMAGE_CLASS_HORIZONTAL)
 		    fetch_mask = NULL;
 	    }
-	    
+
 	    if (src_class == SOURCE_IMAGE_CLASS_HORIZONTAL)
 	    {
 		fetch_src (src, src_x, src_y + i,
-			  width, (void *)src_buffer, 0, 0);
+		           width, (void *)src_buffer, 0, 0);
 		fetch_src = NULL;
 	    }
 	    else
 	    {
 		fetch_src (src, src_x, src_y + i,
-			  width, (void *)src_buffer, (void *)mask_buffer,
-			  0xffffffff);
+		           width, (void *)src_buffer, (void *)mask_buffer,
+		           0xffffffff);
 	    }
 	}
 	else if (fetch_mask)
 	{
 	    fetch_mask (mask, mask_x, mask_y + i,
-		       width, (void *)mask_buffer, 0, 0);
+	                width, (void *)mask_buffer, 0, 0);
 	}
-	
+
 	if (store)
 	{
 	    /* fill dest into second half of scanline */
 	    if (fetch_dest)
+	    {
 		fetch_dest (dest, dest_x, dest_y + i,
-			   width, (void *)dest_buffer, 0, 0);
-	    
+		            width, (void *)dest_buffer, 0, 0);
+	    }
+
 	    /* blend */
-	    compose (imp->toplevel, op, (void *)dest_buffer, (void *)src_buffer, (void *)mask_buffer, width);
-	    
+	    compose (imp->toplevel, op,
+		     (void *)dest_buffer,
+		     (void *)src_buffer,
+		     (void *)mask_buffer,
+		     width);
+
 	    /* write back */
 	    store (&(dest->bits), dest_x, dest_y + i, width,
-		   (void *)dest_buffer);
+	           (void *)dest_buffer);
 	}
 	else
 	{
 	    /* blend */
-	    compose (imp->toplevel, op, bits + (dest_y + i) * stride +
-		     dest_x,
-		     (void *)src_buffer, (void *)mask_buffer, width);
+	    compose (imp->toplevel, op,
+		     bits + (dest_y + i) * stride + dest_x,
+	             (void *)src_buffer, (void *)mask_buffer, width);
 	}
     }
-    
+
     if (scanline_buffer != stack_scanline_buffer)
 	free (scanline_buffer);
 }
 
 static void
-general_composite (pixman_implementation_t *	imp,
-		   pixman_op_t			op,
-		   pixman_image_t *		src,
-		   pixman_image_t *		mask,
-		   pixman_image_t *		dest,
-		   int32_t			src_x,
-		   int32_t			src_y,
-		   int32_t			mask_x,
-		   int32_t			mask_y,
-		   int32_t			dest_x,
-		   int32_t			dest_y,
-		   int32_t			width,
-		   int32_t			height)
+general_composite (pixman_implementation_t * imp,
+                   pixman_op_t               op,
+                   pixman_image_t *          src,
+                   pixman_image_t *          mask,
+                   pixman_image_t *          dest,
+                   int32_t                   src_x,
+                   int32_t                   src_y,
+                   int32_t                   mask_x,
+                   int32_t                   mask_y,
+                   int32_t                   dest_x,
+                   int32_t                   dest_y,
+                   int32_t                   width,
+                   int32_t                   height)
 {
     _pixman_walk_composite_region (imp, op, src, mask, dest, src_x, src_y,
-				   mask_x, mask_y, dest_x, dest_y, width, height,
-				   general_composite_rect);
+                                   mask_x, mask_y, dest_x, dest_y,
+				   width, height,
+                                   general_composite_rect);
 }
 
 static pixman_bool_t
 general_blt (pixman_implementation_t *imp,
-	     uint32_t *src_bits,
-	     uint32_t *dst_bits,
-	     int src_stride,
-	     int dst_stride,
-	     int src_bpp,
-	     int dst_bpp,
-	     int src_x, int src_y,
-	     int dst_x, int dst_y,
-	     int width, int height)
+             uint32_t *               src_bits,
+             uint32_t *               dst_bits,
+             int                      src_stride,
+             int                      dst_stride,
+             int                      src_bpp,
+             int                      dst_bpp,
+             int                      src_x,
+             int                      src_y,
+             int                      dst_x,
+             int                      dst_y,
+             int                      width,
+             int                      height)
 {
     /* We can't blit unless we have sse2 or mmx */
-    
+
     return FALSE;
 }
 
 static pixman_bool_t
 general_fill (pixman_implementation_t *imp,
-	      uint32_t *bits,
-	      int stride,
-	      int bpp,
-	      int x,
-	      int y,
-	      int width,
-	      int height,
-	      uint32_t xor)
+              uint32_t *               bits,
+              int                      stride,
+              int                      bpp,
+              int                      x,
+              int                      y,
+              int                      width,
+              int                      height,
+              uint32_t xor)
 {
     return FALSE;
 }
@@ -298,10 +312,11 @@ _pixman_implementation_create_general (void)
 
     _pixman_setup_combiner_functions_32 (imp);
     _pixman_setup_combiner_functions_64 (imp);
-    
+
     imp->composite = general_composite;
     imp->blt = general_blt;
     imp->fill = general_fill;
-    
+
     return imp;
 }
+
commit 7b3f5fdc571e8d6b4d64f950f2578d47b1056c86
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:06:30 2009 -0400

    Reindent and reformat pixman-fastpath.c

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index a255080..8b819ca 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -50,7 +50,8 @@ fetch_24 (uint8_t *a)
 }
 
 static force_inline void
-store_24 (uint8_t *a, uint32_t v)
+store_24 (uint8_t *a,
+          uint32_t v)
 {
     if (((unsigned long)a) & 1)
     {
@@ -60,7 +61,7 @@ store_24 (uint8_t *a, uint32_t v)
 #else
 	*a = (uint8_t) (v);
 	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
-#endif	
+#endif
     }
     else
     {
@@ -70,24 +71,26 @@ store_24 (uint8_t *a, uint32_t v)
 #else
 	*(uint16_t *)a = (uint16_t)v;
 	*(a + 2) = (uint8_t)(v >> 16);
-#endif	
+#endif
     }
 }
 
 static force_inline uint32_t
-over (uint32_t src, uint32_t dest)
+over (uint32_t src,
+      uint32_t dest)
 {
-    uint32_t a = ~src >> 24; 
+    uint32_t a = ~src >> 24;
 
-    UN8x4_MUL_UN8_ADD_UN8x4(dest, a, src);
+    UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
 
     return dest;
 }
 
 static uint32_t
-in (uint32_t x, uint8_t y)
+in (uint32_t x,
+    uint8_t  y)
 {
-    uint16_t  a = y;
+    uint16_t a = y;
 
     UN8x4_MUL_UN8 (x, a);
 
@@ -101,23 +104,23 @@ in (uint32_t x, uint8_t y)
  */
 static void
 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
-			     pixman_op_t      op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t      src_x,
-			     int32_t      src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
+                                 pixman_op_t              op,
+                                 pixman_image_t *         src_image,
+                                 pixman_image_t *         mask_image,
+                                 pixman_image_t *         dst_image,
+                                 int32_t                  src_x,
+                                 int32_t                  src_y,
+                                 int32_t                  mask_x,
+                                 int32_t                  mask_y,
+                                 int32_t                  dest_x,
+                                 int32_t                  dest_y,
+                                 int32_t                  width,
+                                 int32_t                  height)
 {
-    uint32_t	*src, *src_line;
+    uint32_t    *src, *src_line;
     uint32_t    *dst, *dst_line;
-    uint8_t	*mask, *mask_line;
-    int		 src_stride, mask_stride, dst_stride;
+    uint8_t     *mask, *mask_line;
+    int src_stride, mask_stride, dst_stride;
     uint8_t m;
     uint32_t s, d;
     uint16_t w;
@@ -159,34 +162,35 @@ fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
 
 static void
 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
-			      pixman_op_t      op,
-			      pixman_image_t    *src_image,
-			      pixman_image_t    *mask_image,
-			      pixman_image_t    *dest_image,
-			      int32_t      src_x,
-			      int32_t      src_y,
-			      int32_t      mask_x,
-			      int32_t      mask_y,
-			      int32_t      dest_x,
-			      int32_t      dest_y,
-			      int32_t     width,
-			      int32_t     height)
+                         pixman_op_t              op,
+                         pixman_image_t *         src_image,
+                         pixman_image_t *         mask_image,
+                         pixman_image_t *         dest_image,
+                         int32_t                  src_x,
+                         int32_t                  src_y,
+                         int32_t                  mask_x,
+                         int32_t                  mask_y,
+                         int32_t                  dest_x,
+                         int32_t                  dest_y,
+                         int32_t                  width,
+                         int32_t                  height)
 {
-    uint32_t	src, srca;
-    uint8_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask, m;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    uint16_t    t;
+    uint32_t src, srca;
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    uint16_t t;
 
-    src = _pixman_image_get_solid(src_image, dest_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dest_image->bits.format);
 
     srca = src >> 24;
 
     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    if (srca == 0xff) {
+    if (srca == 0xff)
+    {
 	while (height--)
 	{
 	    dst = dst_line;
@@ -204,7 +208,7 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp,
 		}
 		else if (m != 0xff)
 		{
-		    *dst = MUL_UN8(m, *dst, t);
+		    *dst = MUL_UN8 (m, *dst, t);
 		}
 		dst++;
 	    }
@@ -223,14 +227,14 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp,
 	    while (w--)
 	    {
 		m = *mask++;
-		m = MUL_UN8(m, srca, t);
+		m = MUL_UN8 (m, srca, t);
 		if (m == 0)
 		{
 		    *dst = 0;
 		}
 		else if (m != 0xff)
 		{
-		    *dst = MUL_UN8(m, *dst, t);
+		    *dst = MUL_UN8 (m, *dst, t);
 		}
 		dst++;
 	    }
@@ -238,28 +242,27 @@ fast_composite_in_n_8_8 (pixman_implementation_t *imp,
     }
 }
 
-
 static void
 fast_composite_in_8_8 (pixman_implementation_t *imp,
-		      pixman_op_t      op,
-		      pixman_image_t  *src_image,
-		      pixman_image_t  *mask_image,
-		      pixman_image_t  *dest_image,
-		      int32_t          src_x,
-		      int32_t          src_y,
-		      int32_t          mask_x,
-		      int32_t          mask_y,
-		      int32_t          dest_x,
-		      int32_t          dest_y,
-		      int32_t         width,
-		      int32_t         height)
+                       pixman_op_t              op,
+                       pixman_image_t *         src_image,
+                       pixman_image_t *         mask_image,
+                       pixman_image_t *         dest_image,
+                       int32_t                  src_x,
+                       int32_t                  src_y,
+                       int32_t                  mask_x,
+                       int32_t                  mask_y,
+                       int32_t                  dest_x,
+                       int32_t                  dest_y,
+                       int32_t                  width,
+                       int32_t                  height)
 {
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint8_t	s;
-    uint16_t	t;
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint8_t s;
+    uint16_t t;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
@@ -281,7 +284,7 @@ fast_composite_in_8_8 (pixman_implementation_t *imp,
 	    }
 	    else if (s != 0xff)
 	    {
-		*dst = MUL_UN8(s, *dst, t);
+		*dst = MUL_UN8 (s, *dst, t);
 	    }
 	    dst++;
 	}
@@ -290,26 +293,26 @@ fast_composite_in_8_8 (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
-			       pixman_op_t      op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t      src_x,
-			       int32_t      src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint32_t	 src, srca;
-    uint32_t	*dst_line, *dst, d;
-    uint8_t	*mask_line, *mask, m;
-    int		 dst_stride, mask_stride;
-    uint16_t	 w;
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst, d;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    uint16_t w;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -348,26 +351,26 @@ fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
-				   pixman_op_t op,
-				   pixman_image_t * src_image,
-				   pixman_image_t * mask_image,
-				   pixman_image_t * dst_image,
-				   int32_t      src_x,
-				   int32_t      src_y,
-				   int32_t      mask_x,
-				   int32_t      mask_y,
-				   int32_t      dest_x,
-				   int32_t      dest_y,
-				   int32_t     width,
-				   int32_t     height)
+                                    pixman_op_t              op,
+                                    pixman_image_t *         src_image,
+                                    pixman_image_t *         mask_image,
+                                    pixman_image_t *         dst_image,
+                                    int32_t                  src_x,
+                                    int32_t                  src_y,
+                                    int32_t                  mask_x,
+                                    int32_t                  mask_y,
+                                    int32_t                  dest_x,
+                                    int32_t                  dest_y,
+                                    int32_t                  width,
+                                    int32_t                  height)
 {
-    uint32_t	src, srca;
-    uint32_t	*dst_line, *dst, d;
-    uint32_t	*mask_line, *mask, ma;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst, d;
+    uint32_t    *mask_line, *mask, ma;
+    int dst_stride, mask_stride;
+    uint16_t w;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -413,27 +416,27 @@ fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
-			       pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t      src_x,
-			       int32_t      src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t     width,
-			       int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint32_t	src, srca;
-    uint8_t	*dst_line, *dst;
-    uint32_t	d;
-    uint8_t	*mask_line, *mask, m;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
+    uint32_t src, srca;
+    uint8_t     *dst_line, *dst;
+    uint32_t d;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    uint16_t w;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -459,15 +462,15 @@ fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
 		    d = src;
 		else
 		{
-		    d = fetch_24(dst);
+		    d = fetch_24 (dst);
 		    d = over (src, d);
 		}
-		store_24(dst, d);
+		store_24 (dst, d);
 	    }
 	    else if (m)
 	    {
-		d = over (in(src,m), fetch_24(dst));
-		store_24(dst, d);
+		d = over (in (src, m), fetch_24 (dst));
+		store_24 (dst, d);
 	    }
 	    dst += 3;
 	}
@@ -476,27 +479,27 @@ fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
-			       pixman_op_t op,
-				  pixman_image_t * src_image,
-				  pixman_image_t * mask_image,
-				  pixman_image_t * dst_image,
-				  int32_t      src_x,
-				  int32_t      src_y,
-				  int32_t      mask_x,
-				  int32_t      mask_y,
-				  int32_t      dest_x,
-				  int32_t      dest_y,
-				  int32_t     width,
-				  int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint32_t	src, srca;
-    uint16_t	*dst_line, *dst;
-    uint32_t	d;
-    uint8_t	*mask_line, *mask, m;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
+    uint32_t src, srca;
+    uint16_t    *dst_line, *dst;
+    uint32_t d;
+    uint8_t     *mask_line, *mask, m;
+    int dst_stride, mask_stride;
+    uint16_t w;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
@@ -523,15 +526,15 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
 		else
 		{
 		    d = *dst;
-		    d = over (src, CONVERT_0565_TO_0888(d));
+		    d = over (src, CONVERT_0565_TO_0888 (d));
 		}
-		*dst = CONVERT_8888_TO_0565(d);
+		*dst = CONVERT_8888_TO_0565 (d);
 	    }
 	    else if (m)
 	    {
 		d = *dst;
-		d = over (in(src,m), CONVERT_0565_TO_0888(d));
-		*dst = CONVERT_8888_TO_0565(d);
+		d = over (in (src, m), CONVERT_0565_TO_0888 (d));
+		*dst = CONVERT_8888_TO_0565 (d);
 	    }
 	    dst++;
 	}
@@ -540,34 +543,34 @@ fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
-				   pixman_op_t op,
-				   pixman_image_t * src_image,
-				   pixman_image_t * mask_image,
-				   pixman_image_t * dst_image,
-				   int32_t      src_x,
-				   int32_t      src_y,
-				   int32_t      mask_x,
-				   int32_t      mask_y,
-				   int32_t      dest_x,
-				   int32_t      dest_y,
-				   int32_t     width,
-				   int32_t     height)
+                                    pixman_op_t              op,
+                                    pixman_image_t *         src_image,
+                                    pixman_image_t *         mask_image,
+                                    pixman_image_t *         dst_image,
+                                    int32_t                  src_x,
+                                    int32_t                  src_y,
+                                    int32_t                  mask_x,
+                                    int32_t                  mask_y,
+                                    int32_t                  dest_x,
+                                    int32_t                  dest_y,
+                                    int32_t                  width,
+                                    int32_t                  height)
 {
-    uint32_t	src, srca;
-    uint16_t	src16;
-    uint16_t	*dst_line, *dst;
-    uint32_t	d;
-    uint32_t	*mask_line, *mask, ma;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
+    uint32_t src, srca;
+    uint16_t src16;
+    uint16_t    *dst_line, *dst;
+    uint32_t d;
+    uint32_t    *mask_line, *mask, ma;
+    int dst_stride, mask_stride;
+    uint16_t w;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     srca = src >> 24;
     if (src == 0)
 	return;
 
-    src16 = CONVERT_8888_TO_0565(src);
+    src16 = CONVERT_8888_TO_0565 (src);
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
@@ -592,21 +595,21 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 		else
 		{
 		    d = *dst;
-		    d = over (src, CONVERT_0565_TO_0888(d));
-		    *dst = CONVERT_8888_TO_0565(d);
+		    d = over (src, CONVERT_0565_TO_0888 (d));
+		    *dst = CONVERT_8888_TO_0565 (d);
 		}
 	    }
 	    else if (ma)
 	    {
 		d = *dst;
-		d = CONVERT_0565_TO_0888(d);
+		d = CONVERT_0565_TO_0888 (d);
 
 		UN8x4_MUL_UN8x4 (src, ma);
 		UN8x4_MUL_UN8 (ma, srca);
 		ma = ~ma;
 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, src);
-		
-		*dst = CONVERT_8888_TO_0565(d);
+
+		*dst = CONVERT_8888_TO_0565 (d);
 	    }
 	    dst++;
 	}
@@ -615,24 +618,24 @@ fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
-			  pixman_op_t op,
-			 pixman_image_t * src_image,
-			 pixman_image_t * mask_image,
-			 pixman_image_t * dst_image,
-			 int32_t      src_x,
-			 int32_t      src_y,
-			 int32_t      mask_x,
-			 int32_t      mask_y,
-			 int32_t      dest_x,
-			 int32_t      dest_y,
-			 int32_t     width,
-			 int32_t     height)
+                               pixman_op_t              op,
+                               pixman_image_t *         src_image,
+                               pixman_image_t *         mask_image,
+                               pixman_image_t *         dst_image,
+                               int32_t                  src_x,
+                               int32_t                  src_y,
+                               int32_t                  mask_x,
+                               int32_t                  mask_y,
+                               int32_t                  dest_x,
+                               int32_t                  dest_y,
+                               int32_t                  width,
+                               int32_t                  height)
 {
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src, s;
-    int	dst_stride, src_stride;
-    uint8_t	a;
-    uint16_t	w;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    uint8_t a;
+    uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -660,25 +663,25 @@ fast_composite_over_8888_8888 (pixman_implementation_t *imp,
 
 static void
 fast_composite_src_8888_0888 (pixman_implementation_t *imp,
-			  pixman_op_t op,
-			 pixman_image_t * src_image,
-			 pixman_image_t * mask_image,
-			 pixman_image_t * dst_image,
-			 int32_t      src_x,
-			 int32_t      src_y,
-			 int32_t      mask_x,
-			 int32_t      mask_y,
-			 int32_t      dest_x,
-			 int32_t      dest_y,
-			 int32_t     width,
-			 int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint8_t	*dst_line, *dst;
-    uint32_t	d;
-    uint32_t	*src_line, *src, s;
-    uint8_t	a;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+    uint8_t     *dst_line, *dst;
+    uint32_t d;
+    uint32_t    *src_line, *src, s;
+    uint8_t a;
+    int dst_stride, src_stride;
+    uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
@@ -700,9 +703,9 @@ fast_composite_src_8888_0888 (pixman_implementation_t *imp,
 		if (a == 0xff)
 		    d = s;
 		else
-		    d = over (s, fetch_24(dst));
+		    d = over (s, fetch_24 (dst));
 
-		store_24(dst, d);
+		store_24 (dst, d);
 	    }
 	    dst += 3;
 	}
@@ -711,25 +714,25 @@ fast_composite_src_8888_0888 (pixman_implementation_t *imp,
 
 static void
 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
-			  pixman_op_t op,
-			 pixman_image_t * src_image,
-			 pixman_image_t * mask_image,
-			 pixman_image_t * dst_image,
-			 int32_t      src_x,
-			 int32_t      src_y,
-			 int32_t      mask_x,
-			 int32_t      mask_y,
-			 int32_t      dest_x,
-			 int32_t      dest_y,
-			 int32_t     width,
-			 int32_t     height)
+                               pixman_op_t              op,
+                               pixman_image_t *         src_image,
+                               pixman_image_t *         mask_image,
+                               pixman_image_t *         dst_image,
+                               int32_t                  src_x,
+                               int32_t                  src_y,
+                               int32_t                  mask_x,
+                               int32_t                  mask_y,
+                               int32_t                  dest_x,
+                               int32_t                  dest_y,
+                               int32_t                  width,
+                               int32_t                  height)
 {
-    uint16_t	*dst_line, *dst;
-    uint32_t	d;
-    uint32_t	*src_line, *src, s;
-    uint8_t	a;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+    uint16_t    *dst_line, *dst;
+    uint32_t d;
+    uint32_t    *src_line, *src, s;
+    uint8_t a;
+    int dst_stride, src_stride;
+    uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
@@ -753,9 +756,9 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
 		else
 		{
 		    d = *dst;
-		    d = over (s, CONVERT_0565_TO_0888(d));
+		    d = over (s, CONVERT_0565_TO_0888 (d));
 		}
-		*dst = CONVERT_8888_TO_0565(d);
+		*dst = CONVERT_8888_TO_0565 (d);
 	    }
 	    dst++;
 	}
@@ -764,23 +767,23 @@ fast_composite_over_8888_0565 (pixman_implementation_t *imp,
 
 static void
 fast_composite_src_x888_0565 (pixman_implementation_t *imp,
-			  pixman_op_t op,
-                          pixman_image_t * src_image,
-                          pixman_image_t * mask_image,
-                          pixman_image_t * dst_image,
-                          int32_t      src_x,
-                          int32_t      src_y,
-                          int32_t      mask_x,
-                          int32_t      mask_y,
-                          int32_t      dest_x,
-                          int32_t      dest_y,
-                          int32_t     width,
-                          int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint16_t	*dst_line, *dst;
-    uint32_t	*src_line, *src, s;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+    uint16_t    *dst_line, *dst;
+    uint32_t    *src_line, *src, s;
+    int dst_stride, src_stride;
+    uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
@@ -796,7 +799,7 @@ fast_composite_src_x888_0565 (pixman_implementation_t *imp,
 	while (w--)
 	{
 	    s = *src++;
-	    *dst = CONVERT_8888_TO_0565(s);
+	    *dst = CONVERT_8888_TO_0565 (s);
 	    dst++;
 	}
     }
@@ -804,25 +807,25 @@ fast_composite_src_x888_0565 (pixman_implementation_t *imp,
 
 static void
 fast_composite_add_8000_8000 (pixman_implementation_t *imp,
-			     pixman_op_t	op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t      src_x,
-			     int32_t      src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint8_t	s, d;
-    uint16_t	t;
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint8_t s, d;
+    uint16_t t;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
@@ -855,24 +858,24 @@ fast_composite_add_8000_8000 (pixman_implementation_t *imp,
 
 static void
 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
-			     pixman_op_t	op,
-			     pixman_image_t * src_image,
-			     pixman_image_t * mask_image,
-			     pixman_image_t * dst_image,
-			     int32_t      src_x,
-			     int32_t      src_y,
-			     int32_t      mask_x,
-			     int32_t      mask_y,
-			     int32_t      dest_x,
-			     int32_t      dest_y,
-			     int32_t     width,
-			     int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint32_t	s, d;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint32_t s, d;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
@@ -894,7 +897,7 @@ fast_composite_add_8888_8888 (pixman_implementation_t *imp,
 		{
 		    d = *dst;
 		    if (d)
-			UN8x4_ADD_UN8x4(s,d);
+			UN8x4_ADD_UN8x4 (s, d);
 		}
 		*dst = s;
 	    }
@@ -905,25 +908,25 @@ fast_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 static void
 fast_composite_add_8888_8_8 (pixman_implementation_t *imp,
-			    pixman_op_t op,
-			    pixman_image_t * src_image,
-			    pixman_image_t * mask_image,
-			    pixman_image_t * dst_image,
-			    int32_t      src_x,
-			    int32_t      src_y,
-			    int32_t      mask_x,
-			    int32_t      mask_y,
-			    int32_t      dest_x,
-			    int32_t      dest_y,
-			    int32_t     width,
-			    int32_t     height)
+                             pixman_op_t              op,
+                             pixman_image_t *         src_image,
+                             pixman_image_t *         mask_image,
+                             pixman_image_t *         dst_image,
+                             int32_t                  src_x,
+                             int32_t                  src_y,
+                             int32_t                  mask_x,
+                             int32_t                  mask_y,
+                             int32_t                  dest_x,
+                             int32_t                  dest_y,
+                             int32_t                  width,
+                             int32_t                  height)
 {
-    uint8_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int	dst_stride, mask_stride;
-    uint16_t	w;
-    uint32_t	src;
-    uint8_t	sa;
+    uint8_t     *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
+    uint32_t src;
+    uint8_t sa;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
@@ -940,10 +943,10 @@ fast_composite_add_8888_8_8 (pixman_implementation_t *imp,
 
 	while (w--)
 	{
-	    uint16_t	tmp;
-	    uint16_t	a;
-	    uint32_t	m, d;
-	    uint32_t	r;
+	    uint16_t tmp;
+	    uint16_t a;
+	    uint32_t m, d;
+	    uint32_t r;
 
 	    a = *mask++;
 	    d = *dst;
@@ -962,55 +965,55 @@ fast_composite_add_8888_8_8 (pixman_implementation_t *imp,
 
 static void
 fast_composite_solid_fill (pixman_implementation_t *imp,
-		      pixman_op_t op,
-		      pixman_image_t * src_image,
-		      pixman_image_t * mask_image,
-		      pixman_image_t * dst_image,
-		      int32_t      src_x,
-		      int32_t      src_y,
-		      int32_t      mask_x,
-		      int32_t      mask_y,
-		      int32_t      dest_x,
-		      int32_t      dest_y,
-		      int32_t     width,
-		      int32_t     height)
+                           pixman_op_t              op,
+                           pixman_image_t *         src_image,
+                           pixman_image_t *         mask_image,
+                           pixman_image_t *         dst_image,
+                           int32_t                  src_x,
+                           int32_t                  src_y,
+                           int32_t                  mask_x,
+                           int32_t                  mask_y,
+                           int32_t                  dest_x,
+                           int32_t                  dest_y,
+                           int32_t                  width,
+                           int32_t                  height)
 {
-    uint32_t	src;
+    uint32_t src;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
     if (dst_image->bits.format == PIXMAN_a8)
 	src = src >> 24;
     else if (dst_image->bits.format == PIXMAN_r5g6b5 ||
-	     dst_image->bits.format == PIXMAN_b5g6r5)
+             dst_image->bits.format == PIXMAN_b5g6r5)
 	src = CONVERT_8888_TO_0565 (src);
 
     pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
-		 PIXMAN_FORMAT_BPP (dst_image->bits.format),
-		 dest_x, dest_y,
-		 width, height,
-		 src);
+                 PIXMAN_FORMAT_BPP (dst_image->bits.format),
+                 dest_x, dest_y,
+                 width, height,
+                 src);
 }
 
 static void
 fast_composite_src_8888_x888 (pixman_implementation_t *imp,
-			  pixman_op_t op,
-			  pixman_image_t * src_image,
-			  pixman_image_t * mask_image,
-			  pixman_image_t * dst_image,
-			  int32_t      src_x,
-			  int32_t      src_y,
-			  int32_t      mask_x,
-			  int32_t      mask_y,
-			  int32_t      dest_x,
-			  int32_t      dest_y,
-			  int32_t     width,
-			  int32_t     height)
+                              pixman_op_t              op,
+                              pixman_image_t *         src_image,
+                              pixman_image_t *         mask_image,
+                              pixman_image_t *         dst_image,
+                              int32_t                  src_x,
+                              int32_t                  src_y,
+                              int32_t                  mask_x,
+                              int32_t                  mask_y,
+                              int32_t                  dest_x,
+                              int32_t                  dest_y,
+                              int32_t                  width,
+                              int32_t                  height)
 {
-    uint32_t	*dst;
+    uint32_t    *dst;
     uint32_t    *src;
-    int		 dst_stride, src_stride;
-    uint32_t	 n_bytes = width * sizeof (uint32_t);
+    int dst_stride, src_stride;
+    uint32_t n_bytes = width * sizeof (uint32_t);
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1);
@@ -1040,18 +1043,18 @@ static const pixman_fast_path_t c_fast_paths[] =
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_a8b8g8r8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_x8b8g8r8, fast_composite_over_n_8888_8888_ca, NEED_COMPONENT_ALPHA },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fast_composite_over_n_8888_0565_ca, NEED_COMPONENT_ALPHA },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,	PIXMAN_x8r8g8b8, fast_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,	PIXMAN_a8r8g8b8, fast_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,	PIXMAN_x8b8g8r8, fast_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,	PIXMAN_a8b8g8r8, fast_composite_over_x888_8_8888,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fast_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, fast_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_r5g6b5,	 fast_composite_over_8888_0565,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_a8b8g8r8, fast_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_x8b8g8r8, fast_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fast_composite_over_8888_0565,	   0 },
-    { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8,  PIXMAN_null,	PIXMAN_a8r8g8b8, fast_composite_add_8888_8888,   0 },
-    { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8,  PIXMAN_null,	PIXMAN_a8b8g8r8, fast_composite_add_8888_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, fast_composite_over_x888_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, fast_composite_over_x888_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_x8b8g8r8, fast_composite_over_x888_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8,       PIXMAN_a8b8g8r8, fast_composite_over_x888_8_8888,       0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fast_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fast_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   fast_composite_over_8888_0565,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, fast_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fast_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   fast_composite_over_8888_0565,    0 },
+    { PIXMAN_OP_ADD, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fast_composite_add_8888_8888,   0 },
+    { PIXMAN_OP_ADD, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fast_composite_add_8888_8888,   0 },
     { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fast_composite_add_8000_8000,   0 },
     { PIXMAN_OP_ADD, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fast_composite_add_8888_8_8,    0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_null,     PIXMAN_a8r8g8b8, fast_composite_solid_fill, 0 },
@@ -1069,72 +1072,76 @@ static const pixman_fast_path_t c_fast_paths[] =
     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fast_composite_src_x888_0565, 0 },
     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_b5g6r5,   fast_composite_src_x888_0565, 0 },
     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fast_composite_in_8_8,   0 },
-    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,	PIXMAN_a8,	 fast_composite_in_n_8_8, 0 },
+    { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fast_composite_in_n_8_8, 0 },
     { PIXMAN_OP_NONE },
 };
 
 static void
 fast_composite_src_scale_nearest (pixman_implementation_t *imp,
-			    pixman_op_t     op,
-			    pixman_image_t *src_image,
-			    pixman_image_t *mask_image,
-			    pixman_image_t *dst_image,
-			    int32_t         src_x,
-			    int32_t         src_y,
-			    int32_t         mask_x,
-			    int32_t         mask_y,
-			    int32_t         dest_x,
-			    int32_t         dest_y,
-			    int32_t        width,
-			    int32_t        height)
+                                  pixman_op_t              op,
+                                  pixman_image_t *         src_image,
+                                  pixman_image_t *         mask_image,
+                                  pixman_image_t *         dst_image,
+                                  int32_t                  src_x,
+                                  int32_t                  src_y,
+                                  int32_t                  mask_x,
+                                  int32_t                  mask_y,
+                                  int32_t                  dest_x,
+                                  int32_t                  dest_y,
+                                  int32_t                  width,
+                                  int32_t                  height)
 {
     uint32_t       *dst;
     uint32_t       *src;
-    int             dst_stride, src_stride;
-    int             i, j;
+    int dst_stride, src_stride;
+    int i, j;
     pixman_vector_t v;
-    
+
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst, 1);
     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
      * transformed from destination space to source space */
     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src, 1);
-    
+
     /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed(src_x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed(src_y) + pixman_fixed_1 / 2;
+    v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
     v.vector[2] = pixman_fixed_1;
-    
+
     if (!pixman_transform_point_3d (src_image->common.transform, &v))
-        return;
-    
+	return;
+
     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
     v.vector[0] -= pixman_fixed_e;
     v.vector[1] -= pixman_fixed_e;
-    
-    for (j = 0; j < height; j++) {
-        pixman_fixed_t vx = v.vector[0];
-        pixman_fixed_t vy = v.vector[1];
-        for (i = 0; i < width; ++i) {
-            pixman_bool_t inside_bounds;
-            uint32_t result;
-            int x, y;
-            x = vx >> 16;
-            y = vy >> 16;
-	    
-            /* apply the repeat function */
-            switch (src_image->common.repeat) {
+
+    for (j = 0; j < height; j++)
+    {
+	pixman_fixed_t vx = v.vector[0];
+	pixman_fixed_t vy = v.vector[1];
+
+	for (i = 0; i < width; ++i)
+	{
+	    pixman_bool_t inside_bounds;
+	    uint32_t result;
+	    int x, y;
+	    x = vx >> 16;
+	    y = vy >> 16;
+
+	    /* apply the repeat function */
+	    switch (src_image->common.repeat)
+	    {
 	    case PIXMAN_REPEAT_NORMAL:
 		x = MOD (x, src_image->bits.width);
 		y = MOD (y, src_image->bits.height);
 		inside_bounds = TRUE;
 		break;
-		
+
 	    case PIXMAN_REPEAT_PAD:
-		x = CLIP (x, 0, src_image->bits.width-1);
-		y = CLIP (y, 0, src_image->bits.height-1);
+		x = CLIP (x, 0, src_image->bits.width - 1);
+		y = CLIP (y, 0, src_image->bits.height - 1);
 		inside_bounds = TRUE;
 		break;
-		
+
 	    case PIXMAN_REPEAT_REFLECT:
 		x = MOD (x, src_image->bits.width * 2);
 		if (x >= src_image->bits.width)
@@ -1144,46 +1151,57 @@ fast_composite_src_scale_nearest (pixman_implementation_t *imp,
 		    y = src_image->bits.height * 2 - y - 1;
 		inside_bounds = TRUE;
 		break;
-		
+
 	    case PIXMAN_REPEAT_NONE:
 	    default:
-		inside_bounds = (x >= 0 && x < src_image->bits.width && y >= 0 && y < src_image->bits.height);
+		inside_bounds =
+		    (x >= 0				&&
+		     x < src_image->bits.width		&&
+		     y >= 0				&&
+		     y < src_image->bits.height);
 		break;
-            }
-	    
-            if (inside_bounds) {
-                //XXX: we should move this multiplication out of the loop
-                result = *(src + y * src_stride + x);
-            } else {
-                result = 0;
-            }
+	    }
+
+	    if (inside_bounds)
+	    {
+		/* XXX: we should move this multiplication out of the loop */
+		result = *(src + y * src_stride + x);
+	    }
+	    else
+	    {
+		result = 0;
+	    }
 	    *(dst + i) = result;
-	    
-            /* adjust the x location by a unit vector in the x direction:
-             * this is equivalent to transforming x+1 of the destination point to source space */
-            vx += src_image->common.transform->matrix[0][0];
-        }
-        /* adjust the y location by a unit vector in the y direction
-         * this is equivalent to transforming y+1 of the destination point to source space */
-        v.vector[1] += src_image->common.transform->matrix[1][1];
-        dst += dst_stride;
+
+	    /* adjust the x location by a unit vector in the x direction:
+	     * this is equivalent to transforming x+1 of the destination
+	     * point to source space
+	     */
+	    vx += src_image->common.transform->matrix[0][0];
+	}
+	/* adjust the y location by a unit vector in the y direction
+	 * this is equivalent to transforming y+1 of the destination point
+	 * to source space
+	 */
+	v.vector[1] += src_image->common.transform->matrix[1][1];
+	dst += dst_stride;
     }
 }
 
 static void
 fast_path_composite (pixman_implementation_t *imp,
-		     pixman_op_t     op,
-		     pixman_image_t *src,
-		     pixman_image_t *mask,
-		     pixman_image_t *dest,
-		     int32_t         src_x,
-		     int32_t         src_y,
-		     int32_t         mask_x,
-		     int32_t         mask_y,
-		     int32_t         dest_x,
-		     int32_t         dest_y,
-		     int32_t        width,
-		     int32_t        height)
+                     pixman_op_t              op,
+                     pixman_image_t *         src,
+                     pixman_image_t *         mask,
+                     pixman_image_t *         dest,
+                     int32_t                  src_x,
+                     int32_t                  src_y,
+                     int32_t                  mask_x,
+                     int32_t                  mask_y,
+                     int32_t                  dest_x,
+                     int32_t                  dest_y,
+                     int32_t                  width,
+                     int32_t                  height)
 {
     if (src->type == BITS
         && src->common.transform
@@ -1191,55 +1209,55 @@ fast_path_composite (pixman_implementation_t *imp,
         && op == PIXMAN_OP_SRC
         && !src->common.alpha_map && !dest->common.alpha_map
         && (src->common.filter == PIXMAN_FILTER_NEAREST)
-        && PIXMAN_FORMAT_BPP(dest->bits.format) == 32
+        && PIXMAN_FORMAT_BPP (dest->bits.format) == 32
         && src->bits.format == dest->bits.format
         && !src->common.read_func && !src->common.write_func
         && !dest->common.read_func && !dest->common.write_func)
     {
-        /* ensure that the transform matrix only has a scale */
-        if (src->common.transform->matrix[0][1] == 0 &&
-            src->common.transform->matrix[1][0] == 0 &&
-            src->common.transform->matrix[2][0] == 0 &&
-            src->common.transform->matrix[2][1] == 0 &&
-            src->common.transform->matrix[2][2] == pixman_fixed_1)
+	/* ensure that the transform matrix only has a scale */
+	if (src->common.transform->matrix[0][1] == 0 &&
+	    src->common.transform->matrix[1][0] == 0 &&
+	    src->common.transform->matrix[2][0] == 0 &&
+	    src->common.transform->matrix[2][1] == 0 &&
+	    src->common.transform->matrix[2][2] == pixman_fixed_1)
 	{
 	    _pixman_walk_composite_region (imp, op,
-					   src, mask, dest,
-					   src_x, src_y,
-					   mask_x, mask_y,
-					   dest_x, dest_y,
-					   width, height,
-					   fast_composite_src_scale_nearest);
+	                                   src, mask, dest,
+	                                   src_x, src_y,
+	                                   mask_x, mask_y,
+	                                   dest_x, dest_y,
+	                                   width, height,
+	                                   fast_composite_src_scale_nearest);
 	    return;
 	}
     }
 
     if (_pixman_run_fast_path (c_fast_paths, imp,
-			       op, src, mask, dest,
-			       src_x, src_y,
-			       mask_x, mask_y,
-			       dest_x, dest_y,
-			       width, height))
+                               op, src, mask, dest,
+                               src_x, src_y,
+                               mask_x, mask_y,
+                               dest_x, dest_y,
+                               width, height))
     {
 	return;
     }
 
     _pixman_implementation_composite (imp->delegate, op,
-				      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
+                                      src, mask, dest,
+                                      src_x, src_y,
+                                      mask_x, mask_y,
+                                      dest_x, dest_y,
+                                      width, height);
 }
 
 static void
-pixman_fill8 (uint32_t  *bits,
-	      int	stride,
-	      int	x,
-	      int	y,
-	      int	width,
-	      int	height,
-	      uint32_t  xor)
+pixman_fill8 (uint32_t *bits,
+              int       stride,
+              int       x,
+              int       y,
+              int       width,
+              int       height,
+              uint32_t xor)
 {
     int byte_stride = stride * (int) sizeof (uint32_t);
     uint8_t *dst = (uint8_t *) bits;
@@ -1259,14 +1277,15 @@ pixman_fill8 (uint32_t  *bits,
 
 static void
 pixman_fill16 (uint32_t *bits,
-	       int       stride,
-	       int       x,
-	       int       y,
-	       int       width,
-	       int       height,
-	       uint32_t  xor)
+               int       stride,
+               int       x,
+               int       y,
+               int       width,
+               int       height,
+               uint32_t xor)
 {
-    int short_stride = (stride * (int) sizeof (uint32_t)) / (int) sizeof (uint16_t);
+    int short_stride =
+	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
     uint16_t *dst = (uint16_t *)bits;
     uint16_t v = xor & 0xffff;
     int i;
@@ -1284,12 +1303,12 @@ pixman_fill16 (uint32_t *bits,
 
 static void
 pixman_fill32 (uint32_t *bits,
-	       int       stride,
-	       int       x,
-	       int       y,
-	       int       width,
-	       int       height,
-	       uint32_t  xor)
+               int       stride,
+               int       x,
+               int       y,
+               int       width,
+               int       height,
+               uint32_t  xor)
 {
     int i;
 
@@ -1306,35 +1325,35 @@ pixman_fill32 (uint32_t *bits,
 
 static pixman_bool_t
 fast_path_fill (pixman_implementation_t *imp,
-		uint32_t *bits,
-		int stride,
-		int bpp,
-		int x,
-		int y,
-		int width,
-		int height,
-		uint32_t xor)
+                uint32_t *               bits,
+                int                      stride,
+                int                      bpp,
+                int                      x,
+                int                      y,
+                int                      width,
+                int                      height,
+                uint32_t		 xor)
 {
     switch (bpp)
     {
     case 8:
 	pixman_fill8 (bits, stride, x, y, width, height, xor);
 	break;
-	
+
     case 16:
 	pixman_fill16 (bits, stride, x, y, width, height, xor);
 	break;
-	
+
     case 32:
 	pixman_fill32 (bits, stride, x, y, width, height, xor);
 	break;
-	
+
     default:
 	return _pixman_implementation_fill (
 	    imp->delegate, bits, stride, bpp, x, y, width, height, xor);
 	break;
     }
-    
+
     return TRUE;
 }
 
@@ -1346,6 +1365,7 @@ _pixman_implementation_create_fast_path (void)
 
     imp->composite = fast_path_composite;
     imp->fill = fast_path_fill;
-    
+
     return imp;
 }
+
commit c332e229bb274447b8b46c8f8ba7bce8cfaa21b2
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 19:02:29 2009 -0400

    Reindent and reformat pixman-edge.c

diff --git a/pixman/pixman-edge.c b/pixman/pixman-edge.c
index 82d6385..80e1e5c 100644
--- a/pixman/pixman-edge.c
+++ b/pixman/pixman-edge.c
@@ -32,28 +32,30 @@
 /*
  * Step across a small sample grid gap
  */
-#define RENDER_EDGE_STEP_SMALL(edge) { \
-    edge->x += edge->stepx_small;   \
-    edge->e += edge->dx_small;	    \
-    if (edge->e > 0)		    \
-    {				    \
-	edge->e -= edge->dy;	    \
-	edge->x += edge->signdx;    \
-    }				    \
-}
+#define RENDER_EDGE_STEP_SMALL(edge)					\
+    {									\
+	edge->x += edge->stepx_small;					\
+	edge->e += edge->dx_small;					\
+	if (edge->e > 0)						\
+	{								\
+	    edge->e -= edge->dy;					\
+	    edge->x += edge->signdx;					\
+	}								\
+    }
 
 /*
  * Step across a large sample grid gap
  */
-#define RENDER_EDGE_STEP_BIG(edge) {   \
-    edge->x += edge->stepx_big;	    \
-    edge->e += edge->dx_big;	    \
-    if (edge->e > 0)		    \
-    {				    \
-	edge->e -= edge->dy;	    \
-	edge->x += edge->signdx;    \
-    }				    \
-}
+#define RENDER_EDGE_STEP_BIG(edge)					\
+    {									\
+	edge->x += edge->stepx_big;					\
+	edge->e += edge->dx_big;					\
+	if (edge->e > 0)						\
+	{								\
+	    edge->e -= edge->dy;					\
+	    edge->x += edge->signdx;					\
+	}								\
+    }
 
 #ifdef PIXMAN_FB_ACCESSORS
 #define PIXMAN_RASTERIZE_EDGES pixman_rasterize_edges_accessors
@@ -65,28 +67,30 @@
  * 4 bit alpha
  */
 
-#define N_BITS	4
-#define RASTERIZE_EDGES	rasterize_edges_4
+#define N_BITS  4
+#define RASTERIZE_EDGES rasterize_edges_4
 
 #ifndef WORDS_BIG_ENDIAN
-#define SHIFT_4(o)	((o) << 2)
+#define SHIFT_4(o)      ((o) << 2)
 #else
-#define SHIFT_4(o)	((1-(o)) << 2)
+#define SHIFT_4(o)      ((1 - (o)) << 2)
 #endif
 
-#define GET_4(x,o)	(((x) >> SHIFT_4(o)) & 0xf)
-#define PUT_4(x,o,v)	(((x) & ~(0xf << SHIFT_4(o))) | (((v) & 0xf) << SHIFT_4(o)))
+#define GET_4(x, o)      (((x) >> SHIFT_4 (o)) & 0xf)
+#define PUT_4(x, o, v)							\
+    (((x) & ~(0xf << SHIFT_4 (o))) | (((v) & 0xf) << SHIFT_4 (o)))
 
-#define DEFINE_ALPHA(line,x)			     \
-    uint8_t   *__ap = (uint8_t *) line + ((x) >> 1); \
-    int	    __ao = (x) & 1
+#define DEFINE_ALPHA(line, x)						\
+    uint8_t   *__ap = (uint8_t *) line + ((x) >> 1);			\
+    int __ao = (x) & 1
 
-#define STEP_ALPHA	((__ap += __ao), (__ao ^= 1))
+#define STEP_ALPHA      ((__ap += __ao), (__ao ^= 1))
 
-#define ADD_ALPHA(a) {							\
-	uint8_t   __o = READ(image, __ap);				\
-	uint8_t   __a = (a) + GET_4(__o, __ao);				\
-	WRITE(image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4))));	\
+#define ADD_ALPHA(a)							\
+    {									\
+        uint8_t __o = READ (image, __ap);				\
+        uint8_t __a = (a) + GET_4 (__o, __ao);				\
+        WRITE (image, __ap, PUT_4 (__o, __ao, __a | (0 - ((__a) >> 4)))); \
     }
 
 #include "pixman-edge-imp.h"
@@ -103,7 +107,7 @@
  */
 
 #define N_BITS 1
-#define RASTERIZE_EDGES	rasterize_edges_1
+#define RASTERIZE_EDGES rasterize_edges_1
 
 #include "pixman-edge-imp.h"
 
@@ -114,24 +118,27 @@
  * 8 bit alpha
  */
 
-static inline uint8_t
+static force_inline uint8_t
 clip255 (int x)
 {
-    if (x > 255) return 255;
+    if (x > 255)
+	return 255;
+
     return x;
 }
 
-#define ADD_SATURATE_8(buf,val,length)				\
-    do {							\
-	int i__ = (length);					\
-	uint8_t *buf__ = (buf);					\
-	int val__ = (val);					\
-								\
-	while (i__--)						\
-	{							\
-	    WRITE(image, (buf__), clip255 (READ(image, (buf__)) + (val__)));	\
-	    (buf__)++;						\
-	}							\
+#define ADD_SATURATE_8(buf, val, length)				\
+    do									\
+    {									\
+        int i__ = (length);						\
+        uint8_t *buf__ = (buf);						\
+        int val__ = (val);						\
+									\
+        while (i__--)							\
+        {								\
+            WRITE (image, (buf__), clip255 (READ (image, (buf__)) + (val__))); \
+            (buf__)++;							\
+	}								\
     } while (0)
 
 /*
@@ -146,13 +153,13 @@ clip255 (int x)
  *                   fill_start       fill_end
  */
 static void
-rasterize_edges_8 (pixman_image_t       *image,
-		   pixman_edge_t	*l,
-		   pixman_edge_t	*r,
-		   pixman_fixed_t	t,
-		   pixman_fixed_t	b)
+rasterize_edges_8 (pixman_image_t *image,
+                   pixman_edge_t * l,
+                   pixman_edge_t * r,
+                   pixman_fixed_t  t,
+                   pixman_fixed_t  b)
 {
-    pixman_fixed_t  y = t;
+    pixman_fixed_t y = t;
     uint32_t  *line;
     int fill_start = -1, fill_end = -1;
     int fill_size = 0;
@@ -165,153 +172,165 @@ rasterize_edges_8 (pixman_image_t       *image,
     for (;;)
     {
         uint8_t *ap = (uint8_t *) line;
-	pixman_fixed_t	lx, rx;
-	int	lxi, rxi;
+        pixman_fixed_t lx, rx;
+        int lxi, rxi;
 
-	/* clip X */
-	lx = l->x;
-	if (lx < 0)
+        /* clip X */
+        lx = l->x;
+        if (lx < 0)
 	    lx = 0;
-	rx = r->x;
-	if (pixman_fixed_to_int (rx) >= width)
+
+        rx = r->x;
+
+        if (pixman_fixed_to_int (rx) >= width)
+	{
 	    /* Use the last pixel of the scanline, covered 100%.
 	     * We can't use the first pixel following the scanline,
 	     * because accessing it could result in a buffer overrun.
 	     */
 	    rx = pixman_int_to_fixed (width) - 1;
+	}
 
-	/* Skip empty (or backwards) sections */
-	if (rx > lx)
-	{
+        /* Skip empty (or backwards) sections */
+        if (rx > lx)
+        {
             int lxs, rxs;
 
-	    /* Find pixel bounds for span. */
-	    lxi = pixman_fixed_to_int (lx);
-	    rxi = pixman_fixed_to_int (rx);
+            /* Find pixel bounds for span. */
+            lxi = pixman_fixed_to_int (lx);
+            rxi = pixman_fixed_to_int (rx);
 
             /* Sample coverage for edge pixels */
             lxs = RENDER_SAMPLES_X (lx, 8);
             rxs = RENDER_SAMPLES_X (rx, 8);
 
             /* Add coverage across row */
-	    if (lxi == rxi)
-	    {
-		WRITE(image, ap +lxi, clip255 (READ(image, ap + lxi) + rxs - lxs));
+            if (lxi == rxi)
+            {
+                WRITE (image, ap + lxi,
+		       clip255 (READ (image, ap + lxi) + rxs - lxs));
 	    }
-	    else
-	    {
-		WRITE(image, ap + lxi, clip255 (READ(image, ap + lxi) + N_X_FRAC(8) - lxs));
+            else
+            {
+                WRITE (image, ap + lxi,
+		       clip255 (READ (image, ap + lxi) + N_X_FRAC (8) - lxs));
 
-		/* Move forward so that lxi/rxi is the pixel span */
-		lxi++;
+                /* Move forward so that lxi/rxi is the pixel span */
+                lxi++;
 
-		/* Don't bother trying to optimize the fill unless
+                /* Don't bother trying to optimize the fill unless
 		 * the span is longer than 4 pixels. */
-		if (rxi - lxi > 4)
-		{
-		    if (fill_start < 0)
-		    {
-			fill_start = lxi;
-			fill_end = rxi;
-			fill_size++;
+                if (rxi - lxi > 4)
+                {
+                    if (fill_start < 0)
+                    {
+                        fill_start = lxi;
+                        fill_end = rxi;
+                        fill_size++;
 		    }
-		    else
-		    {
-			if (lxi >= fill_end || rxi < fill_start)
-			{
-			    /* We're beyond what we saved, just fill it */
-			    ADD_SATURATE_8 (ap + fill_start,
-					    fill_size * N_X_FRAC(8),
-					    fill_end - fill_start);
-			    fill_start = lxi;
-			    fill_end = rxi;
-			    fill_size = 1;
+                    else
+                    {
+                        if (lxi >= fill_end || rxi < fill_start)
+                        {
+                            /* We're beyond what we saved, just fill it */
+                            ADD_SATURATE_8 (ap + fill_start,
+                                            fill_size * N_X_FRAC (8),
+                                            fill_end - fill_start);
+                            fill_start = lxi;
+                            fill_end = rxi;
+                            fill_size = 1;
 			}
-			else
-			{
-			    /* Update fill_start */
-			    if (lxi > fill_start)
-			    {
-				ADD_SATURATE_8 (ap + fill_start,
-						fill_size * N_X_FRAC(8),
-						lxi - fill_start);
-				fill_start = lxi;
+                        else
+                        {
+                            /* Update fill_start */
+                            if (lxi > fill_start)
+                            {
+                                ADD_SATURATE_8 (ap + fill_start,
+                                                fill_size * N_X_FRAC (8),
+                                                lxi - fill_start);
+                                fill_start = lxi;
 			    }
-			    else if (lxi < fill_start)
-			    {
-				ADD_SATURATE_8 (ap + lxi, N_X_FRAC(8),
-						fill_start - lxi);
+                            else if (lxi < fill_start)
+                            {
+                                ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8),
+                                                fill_start - lxi);
 			    }
 
-			    /* Update fill_end */
-			    if (rxi < fill_end)
-			    {
-				ADD_SATURATE_8 (ap + rxi,
-						fill_size * N_X_FRAC(8),
-						fill_end - rxi);
-				fill_end = rxi;
+                            /* Update fill_end */
+                            if (rxi < fill_end)
+                            {
+                                ADD_SATURATE_8 (ap + rxi,
+                                                fill_size * N_X_FRAC (8),
+                                                fill_end - rxi);
+                                fill_end = rxi;
 			    }
-			    else if (fill_end < rxi)
-			    {
-				ADD_SATURATE_8 (ap + fill_end,
-						N_X_FRAC(8),
-						rxi - fill_end);
+                            else if (fill_end < rxi)
+                            {
+                                ADD_SATURATE_8 (ap + fill_end,
+                                                N_X_FRAC (8),
+                                                rxi - fill_end);
 			    }
-			    fill_size++;
+                            fill_size++;
 			}
 		    }
 		}
-		else
-		{
-		    ADD_SATURATE_8 (ap + lxi, N_X_FRAC(8), rxi - lxi);
+                else
+                {
+                    ADD_SATURATE_8 (ap + lxi, N_X_FRAC (8), rxi - lxi);
 		}
 
-		WRITE(image, ap + rxi, clip255 (READ(image, ap + rxi) + rxs));
+                WRITE (image, ap + rxi, clip255 (READ (image, ap + rxi) + rxs));
 	    }
 	}
 
-	if (y == b) {
+        if (y == b)
+        {
             /* We're done, make sure we clean up any remaining fill. */
-            if (fill_start != fill_end) {
-		if (fill_size == N_Y_FRAC(8))
-		{
-		    MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start);
+            if (fill_start != fill_end)
+            {
+                if (fill_size == N_Y_FRAC (8))
+                {
+                    MEMSET_WRAPPED (image, ap + fill_start,
+				    0xff, fill_end - fill_start);
 		}
-		else
-		{
-		    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC(8),
-				    fill_end - fill_start);
+                else
+                {
+                    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
+                                    fill_end - fill_start);
 		}
-            }
-	    break;
-        }
+	    }
+            break;
+	}
 
-	if (pixman_fixed_frac (y) != Y_FRAC_LAST(8))
-	{
-	    RENDER_EDGE_STEP_SMALL (l);
-	    RENDER_EDGE_STEP_SMALL (r);
-	    y += STEP_Y_SMALL(8);
+        if (pixman_fixed_frac (y) != Y_FRAC_LAST (8))
+        {
+            RENDER_EDGE_STEP_SMALL (l);
+            RENDER_EDGE_STEP_SMALL (r);
+            y += STEP_Y_SMALL (8);
 	}
-	else
-	{
-	    RENDER_EDGE_STEP_BIG (l);
-	    RENDER_EDGE_STEP_BIG (r);
-	    y += STEP_Y_BIG(8);
+        else
+        {
+            RENDER_EDGE_STEP_BIG (l);
+            RENDER_EDGE_STEP_BIG (r);
+            y += STEP_Y_BIG (8);
             if (fill_start != fill_end)
             {
-		if (fill_size == N_Y_FRAC(8))
-		{
-		    MEMSET_WRAPPED (image, ap + fill_start, 0xff, fill_end - fill_start);
+                if (fill_size == N_Y_FRAC (8))
+                {
+                    MEMSET_WRAPPED (image, ap + fill_start,
+				    0xff, fill_end - fill_start);
 		}
-		else
-		{
-		    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC(8),
-				    fill_end - fill_start);
+                else
+                {
+                    ADD_SATURATE_8 (ap + fill_start, fill_size * N_X_FRAC (8),
+                                    fill_end - fill_start);
 		}
+		
                 fill_start = fill_end = -1;
                 fill_size = 0;
-            }
-	    line += stride;
+	    }
+	    
+            line += stride;
 	}
     }
 }
@@ -321,19 +340,21 @@ static
 #endif
 void
 PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
-			pixman_edge_t	*l,
-			pixman_edge_t	*r,
-			pixman_fixed_t	t,
-			pixman_fixed_t	b)
+                        pixman_edge_t * l,
+                        pixman_edge_t * r,
+                        pixman_fixed_t  t,
+                        pixman_fixed_t  b)
 {
     switch (PIXMAN_FORMAT_BPP (image->bits.format))
     {
     case 1:
 	rasterize_edges_1 (image, l, r, t, b);
 	break;
+
     case 4:
 	rasterize_edges_4 (image, l, r, t, b);
 	break;
+
     case 8:
 	rasterize_edges_8 (image, l, r, t, b);
 	break;
@@ -344,12 +365,12 @@ PIXMAN_RASTERIZE_EDGES (pixman_image_t *image,
 
 PIXMAN_EXPORT void
 pixman_rasterize_edges (pixman_image_t *image,
-			pixman_edge_t	*l,
-			pixman_edge_t	*r,
-			pixman_fixed_t	t,
-			pixman_fixed_t	b)
+                        pixman_edge_t * l,
+                        pixman_edge_t * r,
+                        pixman_fixed_t  t,
+                        pixman_fixed_t  b)
 {
-    if (image->common.read_func	|| image->common.write_func)
+    if (image->common.read_func || image->common.write_func)
 	pixman_rasterize_edges_accessors (image, l, r, t, b);
     else
 	pixman_rasterize_edges_no_accessors (image, l, r, t, b);
commit 4ba9a44e8f4098fc61bfb62650c521b2e37cf9cb
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:59:10 2009 -0400

    Reindent and reformat pixman-cpu.c

diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index bf075f6..73e94c1 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -47,12 +47,16 @@ static volatile pixman_bool_t have_vmx = TRUE;
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-    if(!initialized) {
-        size_t length = sizeof(have_vmx);
-        int error =
-            sysctlbyname("hw.optional.altivec", &have_vmx, &length, NULL, 0);
-        if(error) have_vmx = FALSE;
-        initialized = TRUE;
+    if (!initialized)
+    {
+	size_t length = sizeof(have_vmx);
+	int error =
+	    sysctlbyname ("hw.optional.altivec", &have_vmx, &length, NULL, 0);
+
+	if (error)
+	    have_vmx = FALSE;
+
+	initialized = TRUE;
     }
     return have_vmx;
 }
@@ -69,39 +73,47 @@ pixman_have_vmx (void)
 static pixman_bool_t
 pixman_have_vmx (void)
 {
-    if (!initialized) {
+    if (!initialized)
+    {
 	char fname[64];
 	unsigned long buf[64];
 	ssize_t count = 0;
 	pid_t pid;
 	int fd, i;
 
-	pid = getpid();
-	snprintf(fname, sizeof(fname)-1, "/proc/%d/auxv", pid);
+	pid = getpid ();
+	snprintf (fname, sizeof(fname) - 1, "/proc/%d/auxv", pid);
 
-	fd = open(fname, O_RDONLY);
-	if (fd >= 0) {
-	    for (i = 0; i <= (count / sizeof(unsigned long)); i += 2) {
+	fd = open (fname, O_RDONLY);
+	if (fd >= 0)
+	{
+	    for (i = 0; i <= (count / sizeof(unsigned long)); i += 2)
+	    {
 		/* Read more if buf is empty... */
-		if (i == (count / sizeof(unsigned long))) {
-		    count = read(fd, buf, sizeof(buf));
+		if (i == (count / sizeof(unsigned long)))
+		{
+		    count = read (fd, buf, sizeof(buf));
 		    if (count <= 0)
 			break;
 		    i = 0;
 		}
 
-		if (buf[i] == AT_HWCAP) {
-		    have_vmx = !!(buf[i+1] & PPC_FEATURE_HAS_ALTIVEC);
+		if (buf[i] == AT_HWCAP)
+		{
+		    have_vmx = !!(buf[i + 1] & PPC_FEATURE_HAS_ALTIVEC);
 		    initialized = TRUE;
 		    break;
-		} else if (buf[i] == AT_NULL) {
+		}
+		else if (buf[i] == AT_NULL)
+		{
 		    break;
 		}
 	    }
-	    close(fd);
+	    close (fd);
 	}
     }
-    if (!initialized) {
+    if (!initialized)
+    {
 	/* Something went wrong. Assume 'no' rather than playing
 	   fragile tricks with catching SIGILL. */
 	have_vmx = FALSE;
@@ -110,35 +122,45 @@ pixman_have_vmx (void)
 
     return have_vmx;
 }
+
 #else /* !__APPLE__ && !__linux__ */
 #include <signal.h>
 #include <setjmp.h>
 
 static jmp_buf jump_env;
 
-static void vmx_test(int sig, siginfo_t *si, void *unused) {
+static void
+vmx_test (int        sig,
+	  siginfo_t *si,
+	  void *     unused)
+{
     longjmp (jump_env, 1);
 }
 
 static pixman_bool_t
-pixman_have_vmx (void) {
+pixman_have_vmx (void)
+{
     struct sigaction sa, osa;
     int jmp_result;
-    if (!initialized) {
-        sa.sa_flags = SA_SIGINFO;
-        sigemptyset(&sa.sa_mask);
-        sa.sa_sigaction = vmx_test;
-        sigaction(SIGILL, &sa, &osa);
+
+    if (!initialized)
+    {
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset (&sa.sa_mask);
+	sa.sa_sigaction = vmx_test;
+	sigaction (SIGILL, &sa, &osa);
 	jmp_result = setjmp (jump_env);
-	if (jmp_result == 0) {
+	if (jmp_result == 0)
+	{
 	    asm volatile ( "vor 0, 0, 0" );
 	}
-        sigaction(SIGILL, &osa, NULL);
+	sigaction (SIGILL, &osa, NULL);
 	have_vmx = (jmp_result == 0);
-        initialized = TRUE;
+	initialized = TRUE;
     }
     return have_vmx;
 }
+
 #endif /* __APPLE__ */
 #endif /* USE_VMX */
 
@@ -147,7 +169,7 @@ pixman_have_vmx (void) {
 #if defined(_MSC_VER)
 
 #if defined(USE_ARM_SIMD)
-extern int pixman_msvc_try_arm_simd_op();
+extern int pixman_msvc_try_arm_simd_op ();
 
 pixman_bool_t
 pixman_have_arm_simd (void)
@@ -155,22 +177,24 @@ pixman_have_arm_simd (void)
     static pixman_bool_t initialized = FALSE;
     static pixman_bool_t have_arm_simd = FALSE;
 
-    if (!initialized) {
-        __try {
-            pixman_msvc_try_arm_simd_op();
-            have_arm_simd = TRUE;
-        } __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
-            have_arm_simd = FALSE;
-        }
+    if (!initialized)
+    {
+	__try {
+	    pixman_msvc_try_arm_simd_op ();
+	    have_arm_simd = TRUE;
+	} __except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION) {
+	    have_arm_simd = FALSE;
+	}
 	initialized = TRUE;
     }
 
     return have_arm_simd;
 }
+
 #endif /* USE_ARM_SIMD */
 
 #if defined(USE_ARM_NEON)
-extern int pixman_msvc_try_arm_neon_op();
+extern int pixman_msvc_try_arm_neon_op ();
 
 pixman_bool_t
 pixman_have_arm_neon (void)
@@ -178,18 +202,23 @@ pixman_have_arm_neon (void)
     static pixman_bool_t initialized = FALSE;
     static pixman_bool_t have_arm_neon = FALSE;
 
-    if (!initialized) {
-        __try {
-            pixman_msvc_try_arm_neon_op();
-            have_arm_neon = TRUE;
-        } __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) {
-            have_arm_neon = FALSE;
-        }
+    if (!initialized)
+    {
+	__try
+	{
+	    pixman_msvc_try_arm_neon_op ();
+	    have_arm_neon = TRUE;
+	}
+	__except (GetExceptionCode () == EXCEPTION_ILLEGAL_INSTRUCTION)
+	{
+	    have_arm_neon = FALSE;
+	}
 	initialized = TRUE;
     }
 
     return have_arm_neon;
 }
+
 #endif /* USE_ARM_NEON */
 
 #else /* linux ELF */
@@ -211,40 +240,49 @@ static pixman_bool_t arm_has_iwmmxt = FALSE;
 static pixman_bool_t arm_tests_initialized = FALSE;
 
 static void
-pixman_arm_read_auxv() {
+pixman_arm_read_auxv ()
+{
     int fd;
     Elf32_auxv_t aux;
 
-    fd = open("/proc/self/auxv", O_RDONLY);
-    if (fd >= 0) {
-        while (read(fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t)) {
-            if (aux.a_type == AT_HWCAP) {
+    fd = open ("/proc/self/auxv", O_RDONLY);
+    if (fd >= 0)
+    {
+	while (read (fd, &aux, sizeof(Elf32_auxv_t)) == sizeof(Elf32_auxv_t))
+	{
+	    if (aux.a_type == AT_HWCAP)
+	    {
 		uint32_t hwcap = aux.a_un.a_val;
-		if (getenv("ARM_FORCE_HWCAP"))
-		    hwcap = strtoul(getenv("ARM_FORCE_HWCAP"), NULL, 0);
+		if (getenv ("ARM_FORCE_HWCAP"))
+		    hwcap = strtoul (getenv ("ARM_FORCE_HWCAP"), NULL, 0);
 		// hardcode these values to avoid depending on specific versions
 		// of the hwcap header, e.g. HWCAP_NEON
 		arm_has_vfp = (hwcap & 64) != 0;
 		arm_has_iwmmxt = (hwcap & 512) != 0;
 		// this flag is only present on kernel 2.6.29
 		arm_has_neon = (hwcap & 4096) != 0;
-            } else if (aux.a_type == AT_PLATFORM) {
+	    }
+	    else if (aux.a_type == AT_PLATFORM)
+	    {
 		const char *plat = (const char*) aux.a_un.a_val;
-		if (getenv("ARM_FORCE_PLATFORM"))
-		    plat = getenv("ARM_FORCE_PLATFORM");
-		if (strncmp(plat, "v7l", 3) == 0) {
+		if (getenv ("ARM_FORCE_PLATFORM"))
+		    plat = getenv ("ARM_FORCE_PLATFORM");
+		if (strncmp (plat, "v7l", 3) == 0)
+		{
 		    arm_has_v7 = TRUE;
 		    arm_has_v6 = TRUE;
-		} else if (strncmp(plat, "v6l", 3) == 0) {
+		}
+		else if (strncmp (plat, "v6l", 3) == 0)
+		{
 		    arm_has_v6 = TRUE;
 		}
-            }
-        }
-        close (fd);
+	    }
+	}
+	close (fd);
 
 	// if we don't have 2.6.29, we have to do this hack; set
 	// the env var to trust HWCAP.
-	if (!getenv("ARM_TRUST_HWCAP") && arm_has_v7)
+	if (!getenv ("ARM_TRUST_HWCAP") && arm_has_v7)
 	    arm_has_neon = TRUE;
     }
 
@@ -256,10 +294,11 @@ pixman_bool_t
 pixman_have_arm_simd (void)
 {
     if (!arm_tests_initialized)
-	pixman_arm_read_auxv();
+	pixman_arm_read_auxv ();
 
     return arm_has_v6;
 }
+
 #endif /* USE_ARM_SIMD */
 
 #if defined(USE_ARM_NEON)
@@ -267,10 +306,11 @@ pixman_bool_t
 pixman_have_arm_neon (void)
 {
     if (!arm_tests_initialized)
-	pixman_arm_read_auxv();
+	pixman_arm_read_auxv ();
 
     return arm_has_neon;
 }
+
 #endif /* USE_ARM_NEON */
 
 #endif /* linux */
@@ -289,7 +329,8 @@ pixman_have_arm_neon (void)
 #include <sys/auxv.h>
 #endif
 
-typedef enum {
+typedef enum
+{
     NO_FEATURES = 0,
     MMX = 0x1,
     MMX_EXTENSIONS = 0x2,
@@ -297,24 +338,27 @@ typedef enum {
     SSE2 = 0x8,
     CMOV = 0x10
 } cpu_features_t;
-    
 
-static unsigned int detect_cpu_features(void) {
+
+static unsigned int
+detect_cpu_features (void)
+{
     unsigned int features = 0;
     unsigned int result = 0;
 
 #ifdef HAVE_GETISAX
-    if (getisax(&result, 1)) {
-        if (result & AV_386_CMOV)
-            features |= CMOV;
-        if (result & AV_386_MMX)
-            features |= MMX;
-        if (result & AV_386_AMD_MMX)
-            features |= MMX_EXTENSIONS;
-        if (result & AV_386_SSE)
-            features |= SSE;
-        if (result & AV_386_SSE2)
-            features |= SSE2;
+    if (getisax (&result, 1))
+    {
+	if (result & AV_386_CMOV)
+	    features |= CMOV;
+	if (result & AV_386_MMX)
+	    features |= MMX;
+	if (result & AV_386_AMD_MMX)
+	    features |= MMX_EXTENSIONS;
+	if (result & AV_386_SSE)
+	    features |= SSE;
+	if (result & AV_386_SSE2)
+	    features |= SSE2;
     }
 #else
     char vendor[13];
@@ -334,128 +378,130 @@ static unsigned int detect_cpu_features(void) {
      * original values when we access the output operands.
      */
     __asm__ (
-	"pushf\n"
-	"pop %%eax\n"
-	"mov %%eax, %%ecx\n"
-	"xor $0x00200000, %%eax\n"
-	"push %%eax\n"
-	"popf\n"
-	"pushf\n"
-	"pop %%eax\n"
-	"mov $0x0, %%edx\n"
-	"xor %%ecx, %%eax\n"
-	"jz 1f\n"
-	
-	"mov $0x00000000, %%eax\n"
-	"push %%ebx\n"
-	"cpuid\n"
-	"mov %%ebx, %%eax\n"
-	"pop %%ebx\n"
-	"mov %%eax, %1\n"
-	"mov %%edx, %2\n"
-	"mov %%ecx, %3\n"
-	"mov $0x00000001, %%eax\n"
-	"push %%ebx\n"
-	"cpuid\n"
-	"pop %%ebx\n"
-	"1:\n"
-	"mov %%edx, %0\n"
+        "pushf\n"
+        "pop %%eax\n"
+        "mov %%eax, %%ecx\n"
+        "xor $0x00200000, %%eax\n"
+        "push %%eax\n"
+        "popf\n"
+        "pushf\n"
+        "pop %%eax\n"
+        "mov $0x0, %%edx\n"
+        "xor %%ecx, %%eax\n"
+        "jz 1f\n"
+
+        "mov $0x00000000, %%eax\n"
+        "push %%ebx\n"
+        "cpuid\n"
+        "mov %%ebx, %%eax\n"
+        "pop %%ebx\n"
+        "mov %%eax, %1\n"
+        "mov %%edx, %2\n"
+        "mov %%ecx, %3\n"
+        "mov $0x00000001, %%eax\n"
+        "push %%ebx\n"
+        "cpuid\n"
+        "pop %%ebx\n"
+        "1:\n"
+        "mov %%edx, %0\n"
 	: "=r" (result),
-	  "=m" (vendor[0]),
-	  "=m" (vendor[4]),
-	  "=m" (vendor[8])
+        "=m" (vendor[0]),
+        "=m" (vendor[4]),
+        "=m" (vendor[8])
 	:
 	: "%eax", "%ecx", "%edx"
         );
-    
+
 #elif defined (_MSC_VER)
 
     _asm {
-      pushfd
-      pop eax
-      mov ecx, eax
-      xor eax, 00200000h
-      push eax
-      popfd
-      pushfd
-      pop eax
-      mov edx, 0
-      xor eax, ecx
-      jz nocpuid
-
-      mov eax, 0
-      push ebx
-      cpuid
-      mov eax, ebx
-      pop ebx
-      mov vendor0, eax
-      mov vendor1, edx
-      mov vendor2, ecx
-      mov eax, 1
-      push ebx
-      cpuid
-      pop ebx
+	pushfd
+	pop eax
+	mov ecx, eax
+	xor eax, 00200000h
+	push eax
+	popfd
+	pushfd
+	pop eax
+	mov edx, 0
+	xor eax, ecx
+	jz nocpuid
+
+	mov eax, 0
+	push ebx
+	cpuid
+	mov eax, ebx
+	pop ebx
+	mov vendor0, eax
+	mov vendor1, edx
+	mov vendor2, ecx
+	mov eax, 1
+	push ebx
+	cpuid
+	pop ebx
     nocpuid:
-      mov result, edx
+	mov result, edx
     }
-    memmove (vendor+0, &vendor0, 4);
-    memmove (vendor+4, &vendor1, 4);
-    memmove (vendor+8, &vendor2, 4);
+    memmove (vendor + 0, &vendor0, 4);
+    memmove (vendor + 4, &vendor1, 4);
+    memmove (vendor + 8, &vendor2, 4);
 
 #else
 #   error unsupported compiler
 #endif
 
     features = 0;
-    if (result) {
-        /* result now contains the standard feature bits */
-        if (result & (1 << 15))
-            features |= CMOV;
-        if (result & (1 << 23))
-            features |= MMX;
-        if (result & (1 << 25))
-            features |= SSE;
-        if (result & (1 << 26))
-            features |= SSE2;
-        if ((features & MMX) && !(features & SSE) &&
-            (strcmp(vendor, "AuthenticAMD") == 0 ||
-             strcmp(vendor, "Geode by NSC") == 0)) {
-            /* check for AMD MMX extensions */
+    if (result)
+    {
+	/* result now contains the standard feature bits */
+	if (result & (1 << 15))
+	    features |= CMOV;
+	if (result & (1 << 23))
+	    features |= MMX;
+	if (result & (1 << 25))
+	    features |= SSE;
+	if (result & (1 << 26))
+	    features |= SSE2;
+	if ((features & MMX) && !(features & SSE) &&
+	    (strcmp (vendor, "AuthenticAMD") == 0 ||
+	     strcmp (vendor, "Geode by NSC") == 0))
+	{
+	    /* check for AMD MMX extensions */
 #ifdef __GNUC__
-            __asm__(
-		"	push %%ebx\n"
-		"	mov $0x80000000, %%eax\n"
-		"	cpuid\n"
-		"	xor %%edx, %%edx\n"
-		"	cmp $0x1, %%eax\n"
-		"	jge 2f\n"
-		"	mov $0x80000001, %%eax\n"
-		"	cpuid\n"
-		"2:\n"
-		"	pop %%ebx\n"
-		"	mov %%edx, %0\n"
+	    __asm__ (
+	        "	push %%ebx\n"
+	        "	mov $0x80000000, %%eax\n"
+	        "	cpuid\n"
+	        "	xor %%edx, %%edx\n"
+	        "	cmp $0x1, %%eax\n"
+	        "	jge 2f\n"
+	        "	mov $0x80000001, %%eax\n"
+	        "	cpuid\n"
+	        "2:\n"
+	        "	pop %%ebx\n"
+	        "	mov %%edx, %0\n"
 		: "=r" (result)
 		:
 		: "%eax", "%ecx", "%edx"
-                );
+	        );
 #elif defined _MSC_VER
-            _asm {
-              push ebx
-              mov eax, 80000000h
-              cpuid
-              xor edx, edx
-              cmp eax, 1
-              jge notamd
-              mov eax, 80000001h
-              cpuid
-            notamd:
-              pop ebx
-              mov result, edx
-            }
+	    _asm {
+		push ebx
+		mov eax, 80000000h
+		cpuid
+		xor edx, edx
+		cmp eax, 1
+		jge notamd
+		mov eax, 80000001h
+		cpuid
+	    notamd:
+		pop ebx
+		mov result, edx
+	    }
 #endif
-            if (result & (1<<22))
-                features |= MMX_EXTENSIONS;
-        }
+	    if (result & (1 << 22))
+		features |= MMX_EXTENSIONS;
+	}
     }
 #endif /* HAVE_GETISAX */
 
@@ -470,9 +516,9 @@ pixman_have_mmx (void)
 
     if (!initialized)
     {
-        unsigned int features = detect_cpu_features();
-	mmx_present = (features & (MMX|MMX_EXTENSIONS)) == (MMX|MMX_EXTENSIONS);
-        initialized = TRUE;
+	unsigned int features = detect_cpu_features ();
+	mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
+	initialized = TRUE;
     }
 
     return mmx_present;
@@ -487,13 +533,14 @@ pixman_have_sse2 (void)
 
     if (!initialized)
     {
-        unsigned int features = detect_cpu_features();
-        sse2_present = (features & (MMX|MMX_EXTENSIONS|SSE|SSE2)) == (MMX|MMX_EXTENSIONS|SSE|SSE2);
-        initialized = TRUE;
+	unsigned int features = detect_cpu_features ();
+	sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
+	initialized = TRUE;
     }
 
     return sse2_present;
 }
+
 #endif
 
 #else /* __amd64__ */
@@ -514,22 +561,23 @@ _pixman_choose_implementation (void)
 	return _pixman_implementation_create_sse2 ();
 #endif
 #ifdef USE_MMX
-    if (pixman_have_mmx())
+    if (pixman_have_mmx ())
 	return _pixman_implementation_create_mmx ();
 #endif
 
 #ifdef USE_ARM_NEON
-    if (pixman_have_arm_neon())
+    if (pixman_have_arm_neon ())
 	return _pixman_implementation_create_arm_neon ();
 #endif
 #ifdef USE_ARM_SIMD
-    if (pixman_have_arm_simd())
+    if (pixman_have_arm_simd ())
 	return _pixman_implementation_create_arm_simd ();
 #endif
 #ifdef USE_VMX
-    if (pixman_have_vmx())
+    if (pixman_have_vmx ())
 	return _pixman_implementation_create_vmx ();
 #endif
-    
+
     return _pixman_implementation_create_fast_path ();
 }
+
commit e01fc6bba7f90c80e86b206f28efa3f1f94a083b
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:55:45 2009 -0400

    Reindent and reformat pixman-conical-gradient.c

diff --git a/pixman/pixman-conical-gradient.c b/pixman/pixman-conical-gradient.c
index 8e7d1c5..6a4e31e 100644
--- a/pixman/pixman-conical-gradient.c
+++ b/pixman/pixman-conical-gradient.c
@@ -29,15 +29,19 @@
 #include "pixman-private.h"
 
 static void
-conical_gradient_get_scanline_32 (pixman_image_t *image, int x, int y,
-				  int width, uint32_t *buffer,
-				  const uint32_t *mask, uint32_t mask_bits)
+conical_gradient_get_scanline_32 (pixman_image_t *image,
+                                  int             x,
+                                  int             y,
+                                  int             width,
+                                  uint32_t *      buffer,
+                                  const uint32_t *mask,
+                                  uint32_t        mask_bits)
 {
     source_image_t *source = (source_image_t *)image;
     gradient_t *gradient = (gradient_t *)source;
     conical_gradient_t *conical = (conical_gradient_t *)image;
     uint32_t       *end = buffer + width;
-    pixman_gradient_walker_t  walker;
+    pixman_gradient_walker_t walker;
     pixman_bool_t affine = TRUE;
     double cx = 1.;
     double cy = 0.;
@@ -45,73 +49,92 @@ conical_gradient_get_scanline_32 (pixman_image_t *image, int x, int y,
     double rx = x + 0.5;
     double ry = y + 0.5;
     double rz = 1.;
-    double a = conical->angle/(180.*65536);
+    double a = conical->angle / (180. * 65536);
 
     _pixman_gradient_walker_init (&walker, gradient, source->common.repeat);
-    
-    if (source->common.transform) {
+
+    if (source->common.transform)
+    {
 	pixman_vector_t v;
+
 	/* reference point is the center of the pixel */
-	v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1/2;
-	v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1/2;
+	v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+	v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
 	v.vector[2] = pixman_fixed_1;
+
 	if (!pixman_transform_point_3d (source->common.transform, &v))
 	    return;
+
+	cx = source->common.transform->matrix[0][0] / 65536.;
+	cy = source->common.transform->matrix[1][0] / 65536.;
+	cz = source->common.transform->matrix[2][0] / 65536.;
 	
-	cx = source->common.transform->matrix[0][0]/65536.;
-	cy = source->common.transform->matrix[1][0]/65536.;
-	cz = source->common.transform->matrix[2][0]/65536.;
-	rx = v.vector[0]/65536.;
-	ry = v.vector[1]/65536.;
-	rz = v.vector[2]/65536.;
-	affine = source->common.transform->matrix[2][0] == 0 && v.vector[2] == pixman_fixed_1;
-    }
-    
-    if (affine) {
-	rx -= conical->center.x/65536.;
-	ry -= conical->center.y/65536.;
+	rx = v.vector[0] / 65536.;
+	ry = v.vector[1] / 65536.;
+	rz = v.vector[2] / 65536.;
 	
-	while (buffer < end) {
+	affine =
+	    source->common.transform->matrix[2][0] == 0 &&
+	    v.vector[2] == pixman_fixed_1;
+    }
+
+    if (affine)
+    {
+	rx -= conical->center.x / 65536.;
+	ry -= conical->center.y / 65536.;
+
+	while (buffer < end)
+	{
 	    double angle;
-	    
+
 	    if (!mask || *mask++ & mask_bits)
 	    {
-		pixman_fixed_48_16_t   t;
-		
-		angle = atan2(ry, rx) + a;
-		t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
-		
-		*(buffer) = _pixman_gradient_walker_pixel (&walker, t);
+		pixman_fixed_48_16_t t;
+
+		angle = atan2 (ry, rx) + a;
+		t     = (pixman_fixed_48_16_t) (angle * (65536. / (2 * M_PI)));
+
+		*buffer = _pixman_gradient_walker_pixel (&walker, t);
 	    }
-	    
+
 	    ++buffer;
+	    
 	    rx += cx;
 	    ry += cy;
 	}
-    } else {
-	while (buffer < end) {
+    }
+    else
+    {
+	while (buffer < end)
+	{
 	    double x, y;
 	    double angle;
-	    
+
 	    if (!mask || *mask++ & mask_bits)
 	    {
-		pixman_fixed_48_16_t  t;
-		
-		if (rz != 0) {
-		    x = rx/rz;
-		    y = ry/rz;
-		} else {
+		pixman_fixed_48_16_t t;
+
+		if (rz != 0)
+		{
+		    x = rx / rz;
+		    y = ry / rz;
+		}
+		else
+		{
 		    x = y = 0.;
 		}
-		x -= conical->center.x/65536.;
-		y -= conical->center.y/65536.;
-		angle = atan2(y, x) + a;
-		t     = (pixman_fixed_48_16_t) (angle * (65536. / (2*M_PI)));
+
+		x -= conical->center.x / 65536.;
+		y -= conical->center.y / 65536.;
 		
-		*(buffer) = _pixman_gradient_walker_pixel (&walker, t);
+		angle = atan2 (y, x) + a;
+		t     = (pixman_fixed_48_16_t) (angle * (65536. / (2 * M_PI)));
+
+		*buffer = _pixman_gradient_walker_pixel (&walker, t);
 	    }
-	    
+
 	    ++buffer;
+	    
 	    rx += cx;
 	    ry += cy;
 	    rz += cz;
@@ -127,32 +150,33 @@ conical_gradient_property_changed (pixman_image_t *image)
 }
 
 PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_conical_gradient (pixman_point_fixed_t         *center,
-				      pixman_fixed_t                angle,
-				      const pixman_gradient_stop_t *stops,
-				      int                           n_stops)
+pixman_image_create_conical_gradient (pixman_point_fixed_t *        center,
+                                      pixman_fixed_t                angle,
+                                      const pixman_gradient_stop_t *stops,
+                                      int                           n_stops)
 {
-    pixman_image_t *image = _pixman_image_allocate();
+    pixman_image_t *image = _pixman_image_allocate ();
     conical_gradient_t *conical;
-    
+
     if (!image)
 	return NULL;
-    
+
     conical = &image->conical;
-    
+
     if (!_pixman_init_gradient (&conical->common, stops, n_stops))
     {
 	free (image);
 	return NULL;
     }
-    
+
     image->type = CONICAL;
     conical->center = *center;
     conical->angle = angle;
-    
+
     image->common.property_changed = conical_gradient_property_changed;
-    
+
     conical_gradient_property_changed (image);
-    
+
     return image;
 }
+
commit 2871add52ece8bc4a02c0f4fae98912a0f4a830c
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:54:31 2009 -0400

    Reindent and reformat pixman.c

diff --git a/pixman/pixman.c b/pixman/pixman.c
index fed99ee..94675d2 100644
--- a/pixman/pixman.c
+++ b/pixman/pixman.c
@@ -33,10 +33,10 @@
  */
 typedef struct
 {
-    pixman_op_t			op;
-    pixman_op_t			op_src_dst_opaque;
-    pixman_op_t			op_src_opaque;
-    pixman_op_t			op_dst_opaque;
+    pixman_op_t op;
+    pixman_op_t op_src_dst_opaque;
+    pixman_op_t op_src_opaque;
+    pixman_op_t op_dst_opaque;
 } optimized_operator_info_t;
 
 static const optimized_operator_info_t optimized_operators[] =
@@ -59,14 +59,14 @@ static const optimized_operator_info_t optimized_operators[] =
  * Check if the current operator could be optimized
  */
 static const optimized_operator_info_t*
-pixman_operator_can_be_optimized(pixman_op_t op)
+pixman_operator_can_be_optimized (pixman_op_t op)
 {
     const optimized_operator_info_t *info;
 
     for (info = optimized_operators; info->op != PIXMAN_OP_NONE; info++)
     {
-        if(info->op == op)
-            return info;
+	if (info->op == op)
+	    return info;
     }
     return NULL;
 }
@@ -76,27 +76,30 @@ pixman_operator_can_be_optimized(pixman_op_t op)
  * The output operator should be mathematically equivalent to the source.
  */
 static pixman_op_t
-pixman_optimize_operator(pixman_op_t op, pixman_image_t *src_image, pixman_image_t *mask_image, pixman_image_t *dst_image )
+pixman_optimize_operator (pixman_op_t     op,
+                          pixman_image_t *src_image,
+                          pixman_image_t *mask_image,
+                          pixman_image_t *dst_image)
 {
     pixman_bool_t is_source_opaque;
     pixman_bool_t is_dest_opaque;
-    const optimized_operator_info_t *info = pixman_operator_can_be_optimized(op);
+    const optimized_operator_info_t *info = pixman_operator_can_be_optimized (op);
 
-    if(!info || mask_image)
-        return op;
+    if (!info || mask_image)
+	return op;
 
-    is_source_opaque = _pixman_image_is_opaque(src_image);
-    is_dest_opaque = _pixman_image_is_opaque(dst_image);
+    is_source_opaque = _pixman_image_is_opaque (src_image);
+    is_dest_opaque = _pixman_image_is_opaque (dst_image);
 
-    if(is_source_opaque == FALSE && is_dest_opaque == FALSE)
-        return op;
+    if (is_source_opaque == FALSE && is_dest_opaque == FALSE)
+	return op;
 
-    if(is_source_opaque && is_dest_opaque)
-        return info->op_src_dst_opaque;
-    else if(is_source_opaque)
-        return info->op_src_opaque;
-    else if(is_dest_opaque)
-        return info->op_dst_opaque;
+    if (is_source_opaque && is_dest_opaque)
+	return info->op_src_dst_opaque;
+    else if (is_source_opaque)
+	return info->op_src_opaque;
+    else if (is_dest_opaque)
+	return info->op_dst_opaque;
 
     return op;
 
@@ -106,70 +109,78 @@ static pixman_implementation_t *imp;
 
 PIXMAN_EXPORT void
 pixman_image_composite (pixman_op_t      op,
-			pixman_image_t * src,
-			pixman_image_t * mask,
-			pixman_image_t * dest,
-			int16_t      src_x,
-			int16_t      src_y,
-			int16_t      mask_x,
-			int16_t      mask_y,
-			int16_t      dest_x,
-			int16_t      dest_y,
-			uint16_t     width,
-			uint16_t     height)
+                        pixman_image_t * src,
+                        pixman_image_t * mask,
+                        pixman_image_t * dest,
+                        int16_t          src_x,
+                        int16_t          src_y,
+                        int16_t          mask_x,
+                        int16_t          mask_y,
+                        int16_t          dest_x,
+                        int16_t          dest_y,
+                        uint16_t         width,
+                        uint16_t         height)
 {
     /*
-     * Check if we can replace our operator by a simpler one if the src or dest are opaque
-     * The output operator should be mathematically equivalent to the source.
+     * Check if we can replace our operator by a simpler one
+     * if the src or dest are opaque. The output operator should be
+     * mathematically equivalent to the source.
      */
     op = pixman_optimize_operator(op, src, mask, dest);
-    if(op == PIXMAN_OP_DST || op == PIXMAN_OP_CONJOINT_DST || op == PIXMAN_OP_DISJOINT_DST)
+    if (op == PIXMAN_OP_DST		||
+	op == PIXMAN_OP_CONJOINT_DST	||
+	op == PIXMAN_OP_DISJOINT_DST)
+    {
         return;
+    }
 
     if (!imp)
-	imp = _pixman_choose_implementation();
+	imp = _pixman_choose_implementation ();
 
     _pixman_implementation_composite (imp, op,
-				      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
+                                      src, mask, dest,
+                                      src_x, src_y,
+                                      mask_x, mask_y,
+                                      dest_x, dest_y,
+                                      width, height);
 }
 
 PIXMAN_EXPORT pixman_bool_t
 pixman_blt (uint32_t *src_bits,
-	    uint32_t *dst_bits,
-	    int src_stride,
-	    int dst_stride,
-	    int src_bpp,
-	    int dst_bpp,
-	    int src_x, int src_y,
-	    int dst_x, int dst_y,
-	    int width, int height)
+            uint32_t *dst_bits,
+            int       src_stride,
+            int       dst_stride,
+            int       src_bpp,
+            int       dst_bpp,
+            int       src_x,
+            int       src_y,
+            int       dst_x,
+            int       dst_y,
+            int       width,
+            int       height)
 {
     if (!imp)
-	imp = _pixman_choose_implementation();
-    
+	imp = _pixman_choose_implementation ();
+
     return _pixman_implementation_blt (imp, src_bits, dst_bits, src_stride, dst_stride,
-				       src_bpp, dst_bpp,
-				       src_x, src_y,
-				       dst_x, dst_y,
-				       width, height);
+                                       src_bpp, dst_bpp,
+                                       src_x, src_y,
+                                       dst_x, dst_y,
+                                       width, height);
 }
 
 PIXMAN_EXPORT pixman_bool_t
 pixman_fill (uint32_t *bits,
-	     int stride,
-	     int bpp,
-	     int x,
-	     int y,
-	     int width,
-	     int height,
-	     uint32_t xor)
+             int       stride,
+             int       bpp,
+             int       x,
+             int       y,
+             int       width,
+             int       height,
+             uint32_t xor)
 {
     if (!imp)
-	imp = _pixman_choose_implementation();
+	imp = _pixman_choose_implementation ();
 
     return _pixman_implementation_fill (imp, bits, stride, bpp, x, y, width, height, xor);
 }
@@ -178,28 +189,28 @@ static uint32_t
 color_to_uint32 (const pixman_color_t *color)
 {
     return
-	(color->alpha >> 8 << 24) |
-	(color->red >> 8 << 16) |
+        (color->alpha >> 8 << 24) |
+        (color->red >> 8 << 16) |
         (color->green & 0xff00) |
-	(color->blue >> 8);
+        (color->blue >> 8);
 }
 
 static pixman_bool_t
-color_to_pixel (pixman_color_t *color,
-		uint32_t       *pixel,
-		pixman_format_code_t format)
+color_to_pixel (pixman_color_t *     color,
+                uint32_t *           pixel,
+                pixman_format_code_t format)
 {
     uint32_t c = color_to_uint32 (color);
 
-    if (!(format == PIXMAN_a8r8g8b8	||
-	  format == PIXMAN_x8r8g8b8	||
-	  format == PIXMAN_a8b8g8r8	||
-	  format == PIXMAN_x8b8g8r8	||
-	  format == PIXMAN_b8g8r8a8	||
-	  format == PIXMAN_b8g8r8x8	||
-	  format == PIXMAN_r5g6b5	||
-	  format == PIXMAN_b5g6r5	||
-	  format == PIXMAN_a8))
+    if (!(format == PIXMAN_a8r8g8b8     ||
+          format == PIXMAN_x8r8g8b8     ||
+          format == PIXMAN_a8b8g8r8     ||
+          format == PIXMAN_x8b8g8r8     ||
+          format == PIXMAN_b8g8r8a8     ||
+          format == PIXMAN_b8g8r8x8     ||
+          format == PIXMAN_r5g6b5       ||
+          format == PIXMAN_b5g6r5       ||
+          format == PIXMAN_a8))
     {
 	return FALSE;
     }
@@ -222,7 +233,7 @@ color_to_pixel (pixman_color_t *color,
     if (format == PIXMAN_a8)
 	c = c >> 24;
     else if (format == PIXMAN_r5g6b5 ||
-	     format == PIXMAN_b5g6r5)
+             format == PIXMAN_b5g6r5)
 	c = CONVERT_8888_TO_0565 (c);
 
 #if 0
@@ -235,11 +246,11 @@ color_to_pixel (pixman_color_t *color,
 }
 
 PIXMAN_EXPORT pixman_bool_t
-pixman_image_fill_rectangles (pixman_op_t		    op,
-			      pixman_image_t		   *dest,
-			      pixman_color_t		   *color,
-			      int			    n_rects,
-			      const pixman_rectangle16_t   *rects)
+pixman_image_fill_rectangles (pixman_op_t                 op,
+                              pixman_image_t *            dest,
+                              pixman_color_t *            color,
+                              int                         n_rects,
+                              const pixman_rectangle16_t *rects)
 {
     pixman_image_t *solid;
     pixman_color_t c;
@@ -280,8 +291,8 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 		if (dest->common.have_clip_region)
 		{
 		    if (!pixman_region32_intersect (&fill_region,
-						    &fill_region,
-						    &dest->common.clip_region))
+		                                    &fill_region,
+		                                    &dest->common.clip_region))
 			return FALSE;
 		}
 
@@ -290,8 +301,8 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 		{
 		    const pixman_box32_t *box = &(boxes[j]);
 		    pixman_fill (dest->bits.bits, dest->bits.rowstride, PIXMAN_FORMAT_BPP (dest->bits.format),
-				 box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1,
-				 pixel);
+		                 box->x1, box->y1, box->x2 - box->x1, box->y2 - box->y1,
+		                 pixel);
 		}
 
 		pixman_region32_fini (&fill_region);
@@ -309,9 +320,9 @@ pixman_image_fill_rectangles (pixman_op_t		    op,
 	const pixman_rectangle16_t *rect = &(rects[i]);
 
 	pixman_image_composite (op, solid, NULL, dest,
-				0, 0, 0, 0,
-				rect->x, rect->y,
-				rect->width, rect->height);
+	                        0, 0, 0, 0,
+	                        rect->x, rect->y,
+	                        rect->width, rect->height);
     }
 
     pixman_image_unref (solid);
@@ -364,7 +375,7 @@ pixman_version_string (void)
 /**
  * pixman_format_supported_source:
  * @format: A pixman_format_code_t format
- * 
+ *
  * Return value: whether the provided format code is a supported
  * format for a pixman surface used as a source in
  * rendering.
@@ -374,7 +385,8 @@ pixman_version_string (void)
 PIXMAN_EXPORT pixman_bool_t
 pixman_format_supported_source (pixman_format_code_t format)
 {
-    switch (format) {
+    switch (format)
+    {
     /* 32 bpp formats */
     case PIXMAN_a2b10g10r10:
     case PIXMAN_x2b10g10r10:
@@ -409,11 +421,11 @@ pixman_format_supported_source (pixman_format_code_t format)
     case PIXMAN_g8:
     case PIXMAN_x4a4:
     /* Collides with PIXMAN_c8
-    case PIXMAN_x4c4:
-    */
+       case PIXMAN_x4c4:
+     */
     /* Collides with PIXMAN_g8
-    case PIXMAN_x4g4:
-    */
+       case PIXMAN_x4g4:
+     */
     /* 4bpp formats */
     case PIXMAN_a4:
     case PIXMAN_r1g2b1:
@@ -438,7 +450,7 @@ pixman_format_supported_source (pixman_format_code_t format)
 /**
  * pixman_format_supported_destination:
  * @format: A pixman_format_code_t format
- * 
+ *
  * Return value: whether the provided format code is a supported
  * format for a pixman surface used as a destination in
  * rendering.
@@ -455,3 +467,4 @@ pixman_format_supported_destination (pixman_format_code_t format)
 
     return pixman_format_supported_source (format);
 }
+
commit 0e6e08d380a16f8804706270e74f66960a681167
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:52:33 2009 -0400

    Reindent and reformat pixman-bits-image.c

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 6789eda..e366c85 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -34,10 +34,11 @@
 /* Store functions */
 
 static void
-bits_image_store_scanline_32 (bits_image_t *image,
-			      int x, int y,
-			      int width,
-			      const uint32_t *buffer)
+bits_image_store_scanline_32 (bits_image_t *  image,
+                              int             x,
+                              int             y,
+                              int             width,
+                              const uint32_t *buffer)
 {
     image->store_scanline_raw_32 (image, x, y, width, buffer);
 
@@ -51,10 +52,11 @@ bits_image_store_scanline_32 (bits_image_t *image,
 }
 
 static void
-bits_image_store_scanline_64 (bits_image_t *image,
-			      int x, int y,
-			      int width,
-			      const uint32_t *buffer)
+bits_image_store_scanline_64 (bits_image_t *  image,
+                              int             x,
+                              int             y,
+                              int             width,
+                              const uint32_t *buffer)
 {
     image->store_scanline_raw_64 (image, x, y, width, buffer);
 
@@ -68,15 +70,21 @@ bits_image_store_scanline_64 (bits_image_t *image,
 }
 
 void
-_pixman_image_store_scanline_32 (bits_image_t *image, int x, int y, int width,
-				 const uint32_t *buffer)
+_pixman_image_store_scanline_32 (bits_image_t *  image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 const uint32_t *buffer)
 {
     image->store_scanline_32 (image, x, y, width, buffer);
 }
 
 void
-_pixman_image_store_scanline_64 (bits_image_t *image, int x, int y, int width,
-				 const uint32_t *buffer)
+_pixman_image_store_scanline_64 (bits_image_t *  image,
+                                 int             x,
+                                 int             y,
+                                 int             width,
+                                 const uint32_t *buffer)
 {
     image->store_scanline_64 (image, x, y, width, buffer);
 }
@@ -91,19 +99,23 @@ _pixman_image_store_scanline_64 (bits_image_t *image, int x, int y, int width,
  * the pixel returned will be 0.
  */
 static void
-bits_image_fetch_raw_pixels (bits_image_t *image, uint32_t *buffer, int n_pixels)
+bits_image_fetch_raw_pixels (bits_image_t *image,
+                             uint32_t *    buffer,
+                             int           n_pixels)
 {
     image->fetch_pixels_raw_32 (image, buffer, n_pixels);
 }
 
 static void
-bits_image_fetch_alpha_pixels (bits_image_t *image, uint32_t *buffer, int n_pixels)
+bits_image_fetch_alpha_pixels (bits_image_t *image,
+                               uint32_t *    buffer,
+                               int           n_pixels)
 {
 #define N_ALPHA_PIXELS 256
-    
+
     uint32_t alpha_pixels[N_ALPHA_PIXELS * 2];
     int i;
-    
+
     if (!image->common.alpha_map)
     {
 	bits_image_fetch_raw_pixels (image, buffer, n_pixels);
@@ -117,40 +129,40 @@ bits_image_fetch_alpha_pixels (bits_image_t *image, uint32_t *buffer, int n_pixe
 	int tmp_n_pixels = MIN (N_ALPHA_PIXELS, n_pixels - i);
 	int j;
 	int32_t *coords;
-	
+
 	memcpy (alpha_pixels, buffer + 2 * i, tmp_n_pixels * 2 * sizeof (int32_t));
 	coords = (int32_t *)alpha_pixels;
 	for (j = 0; j < tmp_n_pixels; ++j)
 	{
 	    int32_t x = coords[0];
 	    int32_t y = coords[1];
-	    
+
 	    if (x != 0xffffffff)
 	    {
 		x -= image->common.alpha_origin_x;
-		
+
 		if (x < 0 || x >= image->common.alpha_map->width)
 		    x = 0xffffffff;
 	    }
-	    
+
 	    if (y != 0xffffffff)
 	    {
 		y -= image->common.alpha_origin_y;
-		
+
 		if (y < 0 || y >= image->common.alpha_map->height)
 		    y = 0xffffffff;
 	    }
-	    
+
 	    coords[0] = x;
 	    coords[1] = y;
-	    
+
 	    coords += 2;
 	}
-	
+
 	bits_image_fetch_raw_pixels (image->common.alpha_map, alpha_pixels,
-				     tmp_n_pixels);
+	                             tmp_n_pixels);
 	bits_image_fetch_raw_pixels (image, buffer + 2 * i, tmp_n_pixels);
-	
+
 	for (j = 0; j < tmp_n_pixels; ++j)
 	{
 	    int a = alpha_pixels[j] >> 24;
@@ -164,13 +176,19 @@ bits_image_fetch_alpha_pixels (bits_image_t *image, uint32_t *buffer, int n_pixe
 }
 
 static void
-bits_image_fetch_pixels_src_clip (bits_image_t *image, uint32_t *buffer, int n_pixels)
+bits_image_fetch_pixels_src_clip (bits_image_t *image,
+                                  uint32_t *    buffer,
+                                  int           n_pixels)
 {
     bits_image_fetch_alpha_pixels (image, buffer, n_pixels);
 }
 
 static force_inline void
-repeat (pixman_repeat_t repeat, int width, int height, int *x, int *y)
+repeat (pixman_repeat_t repeat,
+        int             width,
+        int             height,
+        int *           x,
+        int *           y)
 {
     switch (repeat)
     {
@@ -183,14 +201,14 @@ repeat (pixman_repeat_t repeat, int width, int height, int *x, int *y)
 	*x = CLIP (*x, 0, width - 1);
 	*y = CLIP (*y, 0, height - 1);
 	break;
-	
+
     case PIXMAN_REPEAT_REFLECT:
 	*x = MOD (*x, width * 2);
 	*y = MOD (*y, height * 2);
 
 	if (*x >= width)
 	    *x = width * 2 - *x - 1;
-	
+
 	if (*y >= height)
 	    *y = height * 2 - *y - 1;
 	break;
@@ -209,7 +227,9 @@ repeat (pixman_repeat_t repeat, int width, int height, int *x, int *y)
  * a list of pixels on output
  */
 static void
-bits_image_fetch_nearest_pixels (bits_image_t *image, uint32_t *buffer, int n_pixels)
+bits_image_fetch_nearest_pixels (bits_image_t *image,
+                                 uint32_t *    buffer,
+                                 int           n_pixels)
 {
     pixman_repeat_t repeat_mode = image->common.repeat;
     int width = image->width;
@@ -234,20 +254,22 @@ bits_image_fetch_nearest_pixels (bits_image_t *image, uint32_t *buffer, int n_pi
     bits_image_fetch_pixels_src_clip (image, buffer, n_pixels);
 }
 
-#define N_TMP_PIXELS	(256)
+#define N_TMP_PIXELS    (256)
 
 /* Buffer contains list of fixed-point coordinates on input,
  * a list of pixels on output
  */
 static void
-bits_image_fetch_bilinear_pixels (bits_image_t *image, uint32_t *buffer, int n_pixels)
+bits_image_fetch_bilinear_pixels (bits_image_t *image,
+                                  uint32_t *    buffer,
+                                  int           n_pixels)
 {
 /* (Four pixels * two coordinates) per pixel */
-#define N_TEMPS		(N_TMP_PIXELS * 8)
-#define N_DISTS		(N_TMP_PIXELS * 2)
-    
+#define N_TEMPS         (N_TMP_PIXELS * 8)
+#define N_DISTS         (N_TMP_PIXELS * 2)
+
     uint32_t temps[N_TEMPS];
-    int32_t  dists[N_DISTS];
+    int32_t dists[N_DISTS];
     pixman_repeat_t repeat_mode = image->common.repeat;
     int width = image->width;
     int height = image->height;
@@ -258,24 +280,24 @@ bits_image_fetch_bilinear_pixels (bits_image_t *image, uint32_t *buffer, int n_p
     coords = (int32_t *)buffer;
     while (i < n_pixels)
     {
-	int tmp_n_pixels = MIN(N_TMP_PIXELS, n_pixels - i);
+	int tmp_n_pixels = MIN (N_TMP_PIXELS, n_pixels - i);
 	int32_t distx, disty;
 	uint32_t *u;
 	int32_t *t, *d;
 	int j;
-	
+
 	t = (int32_t *)temps;
 	d = dists;
 	for (j = 0; j < tmp_n_pixels; ++j)
 	{
 	    int32_t x1, y1, x2, y2;
-	    
+
 	    x1 = coords[0] - pixman_fixed_1 / 2;
 	    y1 = coords[1] - pixman_fixed_1 / 2;
-	    
+
 	    distx = (x1 >> 8) & 0xff;
 	    disty = (y1 >> 8) & 0xff;
-	    
+
 	    x1 >>= 16;
 	    y1 >>= 16;
 	    x2 = x1 + 1;
@@ -283,7 +305,7 @@ bits_image_fetch_bilinear_pixels (bits_image_t *image, uint32_t *buffer, int n_p
 
 	    repeat (repeat_mode, width, height, &x1, &y1);
 	    repeat (repeat_mode, width, height, &x2, &y2);
-	    
+
 	    *t++ = x1;
 	    *t++ = y1;
 	    *t++ = x2;
@@ -308,7 +330,7 @@ bits_image_fetch_bilinear_pixels (bits_image_t *image, uint32_t *buffer, int n_p
 	    uint32_t tl, tr, bl, br, r;
 	    int32_t idistx, idisty;
 	    uint32_t ft, fb;
-	    
+
 	    tl = *u++;
 	    tr = *u++;
 	    bl = *u++;
@@ -320,19 +342,19 @@ bits_image_fetch_bilinear_pixels (bits_image_t *image, uint32_t *buffer, int n_p
 	    idistx = 256 - distx;
 	    idisty = 256 - disty;
 
-#define GET8(v,i)   ((uint16_t) (uint8_t) ((v) >> i))
-	    
-	    ft = GET8(tl,0) * idistx + GET8(tr,0) * distx;
-	    fb = GET8(bl,0) * idistx + GET8(br,0) * distx;
+#define GET8(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
+
+	    ft = GET8 (tl, 0) * idistx + GET8 (tr, 0) * distx;
+	    fb = GET8 (bl, 0) * idistx + GET8 (br, 0) * distx;
 	    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-	    ft = GET8(tl,8) * idistx + GET8(tr,8) * distx;
-	    fb = GET8(bl,8) * idistx + GET8(br,8) * distx;
+	    ft = GET8 (tl, 8) * idistx + GET8 (tr, 8) * distx;
+	    fb = GET8 (bl, 8) * idistx + GET8 (br, 8) * distx;
 	    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-	    ft = GET8(tl,16) * idistx + GET8(tr,16) * distx;
-	    fb = GET8(bl,16) * idistx + GET8(br,16) * distx;
+	    ft = GET8 (tl, 16) * idistx + GET8 (tr, 16) * distx;
+	    fb = GET8 (bl, 16) * idistx + GET8 (br, 16) * distx;
 	    r |= (((ft * idisty + fb * disty)) & 0xff0000);
-	    ft = GET8(tl,24) * idistx + GET8(tr,24) * distx;
-	    fb = GET8(bl,24) * idistx + GET8(br,24) * distx;
+	    ft = GET8 (tl, 24) * idistx + GET8 (tr, 24) * distx;
+	    fb = GET8 (bl, 24) * idistx + GET8 (br, 24) * distx;
 	    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
 
 	    buffer[i++] = r;
@@ -345,7 +367,8 @@ bits_image_fetch_bilinear_pixels (bits_image_t *image, uint32_t *buffer, int n_p
  */
 static void
 bits_image_fetch_convolution_pixels (bits_image_t *image,
-				     uint32_t *buffer, int n_pixels)
+                                     uint32_t *    buffer,
+                                     int           n_pixels)
 {
     uint32_t tmp_pixels_stack[N_TMP_PIXELS * 2]; /* Two coordinates per pixel */
     uint32_t *tmp_pixels = tmp_pixels_stack;
@@ -380,7 +403,7 @@ bits_image_fetch_convolution_pixels (bits_image_t *image,
     }
 
     max_n_kernels = n_tmp_pixels / kernel_size;
-    
+
     i = 0;
     coords = (int32_t *)buffer;
     while (i < n_pixels)
@@ -390,7 +413,7 @@ bits_image_fetch_convolution_pixels (bits_image_t *image,
 	int width = image->width;
 	int height = image->height;
 	int j;
-	
+
 	t = (int32_t *)tmp_pixels;
 	for (j = 0; j < n_kernels; ++j)
 	{
@@ -408,9 +431,9 @@ bits_image_fetch_convolution_pixels (bits_image_t *image,
 		{
 		    int rx = x;
 		    int ry = y;
-		    
+
 		    repeat (repeat_mode, width, height, &rx, &ry);
-		    
+
 		    *t++ = rx;
 		    *t++ = ry;
 		}
@@ -429,7 +452,7 @@ bits_image_fetch_convolution_pixels (bits_image_t *image,
 	    int k;
 
 	    srtot = sgtot = sbtot = satot = 0;
-		
+
 	    for (k = 0; k < kernel_size; ++k)
 	    {
 		pixman_fixed_t f = *p++;
@@ -437,10 +460,10 @@ bits_image_fetch_convolution_pixels (bits_image_t *image,
 		{
 		    uint32_t c = *u++;
 
-		    srtot += RED_8(c) * f;
-		    sgtot += GREEN_8(c) * f;
-		    sbtot += BLUE_8(c) * f;
-		    satot += ALPHA_8(c) * f;
+		    srtot += RED_8 (c) * f;
+		    sgtot += GREEN_8 (c) * f;
+		    sbtot += BLUE_8 (c) * f;
+		    satot += ALPHA_8 (c) * f;
 		}
 	    }
 
@@ -448,25 +471,27 @@ bits_image_fetch_convolution_pixels (bits_image_t *image,
 	    srtot >>= 16;
 	    sgtot >>= 16;
 	    sbtot >>= 16;
-	    
-	    if (satot < 0) satot = 0; else if (satot > 0xff) satot = 0xff;
-	    if (srtot < 0) srtot = 0; else if (srtot > 0xff) srtot = 0xff;
-	    if (sgtot < 0) sgtot = 0; else if (sgtot > 0xff) sgtot = 0xff;
-	    if (sbtot < 0) sbtot = 0; else if (sbtot > 0xff) sbtot = 0xff;
+
+	    satot = CLIP (satot, 0, 0xff);
+	    srtot = CLIP (srtot, 0, 0xff);
+	    sgtot = CLIP (sgtot, 0, 0xff);
+	    sbtot = CLIP (sbtot, 0, 0xff);
 
 	    buffer[i++] = ((satot << 24) |
-			   (srtot << 16) |
-			   (sgtot <<  8) |
-			   (sbtot       ));
+	                   (srtot << 16) |
+	                   (sgtot <<  8) |
+	                   (sbtot       ));
 	}
     }
-    
+
     if (tmp_pixels != tmp_pixels_stack)
 	free (tmp_pixels);
 }
 
 static void
-bits_image_fetch_filtered (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+bits_image_fetch_filtered (bits_image_t *pict,
+                           uint32_t *    buffer,
+                           int           n_pixels)
 {
     switch (pict->common.filter)
     {
@@ -474,13 +499,13 @@ bits_image_fetch_filtered (bits_image_t *pict, uint32_t *buffer, int n_pixels)
     case PIXMAN_FILTER_FAST:
 	bits_image_fetch_nearest_pixels (pict, buffer, n_pixels);
 	break;
-	
+
     case PIXMAN_FILTER_BILINEAR:
     case PIXMAN_FILTER_GOOD:
     case PIXMAN_FILTER_BEST:
 	bits_image_fetch_bilinear_pixels (pict, buffer, n_pixels);
 	break;
-	
+
     case PIXMAN_FILTER_CONVOLUTION:
 	bits_image_fetch_convolution_pixels (pict, buffer, n_pixels);
 	break;
@@ -488,16 +513,20 @@ bits_image_fetch_filtered (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 }
 
 static void
-bits_image_fetch_transformed (pixman_image_t * pict, int x, int y,
-			      int width, uint32_t *buffer,
-			      const uint32_t *mask, uint32_t mask_bits)
+bits_image_fetch_transformed (pixman_image_t * pict,
+                              int              x,
+                              int              y,
+                              int              width,
+                              uint32_t *       buffer,
+                              const uint32_t * mask,
+                              uint32_t         mask_bits)
 {
-    uint32_t     *bits;
-    int32_t    stride;
-    pixman_vector_t v;
-    pixman_vector_t unit;
     pixman_bool_t affine = TRUE;
     uint32_t tmp_buffer[2 * N_TMP_PIXELS];
+    pixman_vector_t unit;
+    pixman_vector_t v;
+    uint32_t *bits;
+    int32_t stride;
     int32_t *coords;
     int i;
 
@@ -505,28 +534,28 @@ bits_image_fetch_transformed (pixman_image_t * pict, int x, int y,
     stride = pict->bits.rowstride;
 
     /* reference point is the center of the pixel */
-    v.vector[0] = pixman_int_to_fixed(x) + pixman_fixed_1 / 2;
-    v.vector[1] = pixman_int_to_fixed(y) + pixman_fixed_1 / 2;
+    v.vector[0] = pixman_int_to_fixed (x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (y) + pixman_fixed_1 / 2;
     v.vector[2] = pixman_fixed_1;
 
     /* when using convolution filters or PIXMAN_REPEAT_PAD one
      * might get here without a transform */
     if (pict->common.transform)
     {
-        if (!pixman_transform_point_3d (pict->common.transform, &v))
-            return;
-	
-        unit.vector[0] = pict->common.transform->matrix[0][0];
-        unit.vector[1] = pict->common.transform->matrix[1][0];
-        unit.vector[2] = pict->common.transform->matrix[2][0];
+	if (!pixman_transform_point_3d (pict->common.transform, &v))
+	    return;
 
-        affine = (v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0);
+	unit.vector[0] = pict->common.transform->matrix[0][0];
+	unit.vector[1] = pict->common.transform->matrix[1][0];
+	unit.vector[2] = pict->common.transform->matrix[2][0];
+
+	affine = (v.vector[2] == pixman_fixed_1 && unit.vector[2] == 0);
     }
     else
     {
-        unit.vector[0] = pixman_fixed_1;
-        unit.vector[1] = 0;
-        unit.vector[2] = 0;
+	unit.vector[0] = pixman_fixed_1;
+	unit.vector[1] = 0;
+	unit.vector[2] = 0;
     }
 
     i = 0;
@@ -534,7 +563,7 @@ bits_image_fetch_transformed (pixman_image_t * pict, int x, int y,
     {
 	int n_pixels = MIN (N_TMP_PIXELS, width - i);
 	int j;
-	
+
 	coords = (int32_t *)tmp_buffer;
 
 	for (j = 0; j < n_pixels; ++j)
@@ -547,16 +576,16 @@ bits_image_fetch_transformed (pixman_image_t * pict, int x, int y,
 	    else
 	    {
 		pixman_fixed_48_16_t div;
-		
+
 		div = ((pixman_fixed_48_16_t)v.vector[0] << 16) / v.vector[2];
 
 		if ((div >> 16) > 0x7fff)
-		    coords[0] = 0x7fffffff; 
+		    coords[0] = 0x7fffffff;
 		else if ((div >> 16) < 0x8000)
 		    coords[0] = 0x80000000;
 		else
 		    coords[0] = div;
-		
+
 		div = ((pixman_fixed_48_16_t)v.vector[1] << 16) / v.vector[2];
 
 		if ((div >> 16) > 0x7fff)
@@ -576,40 +605,48 @@ bits_image_fetch_transformed (pixman_image_t * pict, int x, int y,
 	}
 
 	bits_image_fetch_filtered (&pict->bits, tmp_buffer, n_pixels);
-	
+
 	for (j = 0; j < n_pixels; ++j)
 	    buffer[i++] = tmp_buffer[j];
     }
 }
 
 static void
-bits_image_fetch_solid_32 (pixman_image_t * image, int x, int y,
-			   int width, uint32_t *buffer,
-			   const uint32_t *mask, uint32_t mask_bits)
+bits_image_fetch_solid_32 (pixman_image_t * image,
+                           int              x,
+                           int              y,
+                           int              width,
+                           uint32_t *       buffer,
+                           const uint32_t * mask,
+                           uint32_t         mask_bits)
 {
     uint32_t color[2];
     uint32_t *end;
 
     color[0] = 0;
     color[1] = 0;
-    
+
     image->bits.fetch_pixels_raw_32 (&image->bits, color, 1);
-    
+
     end = buffer + width;
     while (buffer < end)
 	*(buffer++) = color[0];
 }
 
 static void
-bits_image_fetch_solid_64 (pixman_image_t * image, int x, int y,
-			   int width, uint32_t *b,
-			   const uint32_t *unused, uint32_t unused2)
+bits_image_fetch_solid_64 (pixman_image_t * image,
+                           int              x,
+                           int              y,
+                           int              width,
+                           uint32_t *       b,
+                           const uint32_t * unused,
+                           uint32_t         unused2)
 {
     uint64_t color;
     uint32_t *coords = (uint32_t *)&color;
     uint64_t *buffer = (uint64_t *)b;
     uint64_t *end;
-    
+
     coords[0] = 0;
     coords[1] = 0;
     
@@ -621,9 +658,12 @@ bits_image_fetch_solid_64 (pixman_image_t * image, int x, int y,
 }
 
 static void
-bits_image_fetch_untransformed_repeat_none (bits_image_t *image, pixman_bool_t wide,
-					    int x, int y, int width,
-					    uint32_t *buffer)
+bits_image_fetch_untransformed_repeat_none (bits_image_t *image,
+                                            pixman_bool_t wide,
+                                            int           x,
+                                            int           y,
+                                            int           width,
+                                            uint32_t *    buffer)
 {
     uint32_t w;
 
@@ -636,52 +676,56 @@ bits_image_fetch_untransformed_repeat_none (bits_image_t *image, pixman_bool_t w
     if (x < 0)
     {
 	w = MIN (width, -x);
-	
-	memset (buffer, 0, w * (wide? 8 : 4));
-	
+
+	memset (buffer, 0, w * (wide ? 8 : 4));
+
 	width -= w;
 	buffer += w * (wide? 2 : 1);
 	x += w;
     }
-    
+
     if (x < image->width)
     {
 	w = MIN (width, image->width - x);
-	
+
 	if (wide)
 	    image->fetch_scanline_raw_64 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0);
 	else
 	    image->fetch_scanline_raw_32 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0);
-	
+
 	width -= w;
 	buffer += w * (wide? 2 : 1);
 	x += w;
     }
 
-    memset (buffer, 0, width * (wide? 8 : 4));
+    memset (buffer, 0, width * (wide ? 8 : 4));
 }
 
 static void
-bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, pixman_bool_t wide,
-					      int x, int y, int width,
-					      uint32_t *buffer)
+bits_image_fetch_untransformed_repeat_normal (bits_image_t *image,
+                                              pixman_bool_t wide,
+                                              int           x,
+                                              int           y,
+                                              int           width,
+                                              uint32_t *    buffer)
 {
     uint32_t w;
-    
+
     while (y < 0)
 	y += image->height;
+
     while (y >= image->height)
 	y -= image->height;
-    
+
     while (width)
     {
 	while (x < 0)
 	    x += image->width;
 	while (x >= image->width)
 	    x -= image->width;
-	
+
 	w = MIN (width, image->width - x);
-	
+
 	if (wide)
 	    image->fetch_scanline_raw_64 ((pixman_image_t *)image, x, y, w, buffer, NULL, 0);
 	else
@@ -694,9 +738,13 @@ bits_image_fetch_untransformed_repeat_normal (bits_image_t *image, pixman_bool_t
 }
 
 static void
-bits_image_fetch_untransformed_32 (pixman_image_t * image, int x, int y,
-				   int width, uint32_t *buffer,
-				   const uint32_t *mask, uint32_t mask_bits)
+bits_image_fetch_untransformed_32 (pixman_image_t * image,
+                                   int              x,
+                                   int              y,
+                                   int              width,
+                                   uint32_t *       buffer,
+                                   const uint32_t * mask,
+                                   uint32_t         mask_bits)
 {
     if (image->common.repeat == PIXMAN_REPEAT_NONE)
     {
@@ -711,9 +759,13 @@ bits_image_fetch_untransformed_32 (pixman_image_t * image, int x, int y,
 }
 
 static void
-bits_image_fetch_untransformed_64 (pixman_image_t * image, int x, int y,
-				   int width, uint32_t *buffer,
-				   const uint32_t *unused, uint32_t unused2)
+bits_image_fetch_untransformed_64 (pixman_image_t * image,
+                                   int              x,
+                                   int              y,
+                                   int              width,
+                                   uint32_t *       buffer,
+                                   const uint32_t * unused,
+                                   uint32_t         unused2)
 {
     if (image->common.repeat == PIXMAN_REPEAT_NONE)
     {
@@ -750,9 +802,9 @@ pixman_disable_out_of_bounds_workaround (void)
 static pixman_bool_t
 source_image_needs_out_of_bounds_workaround (bits_image_t *image)
 {
-    if (image->common.clip_sources			&&
-	image->common.repeat == PIXMAN_REPEAT_NONE	&&
-	out_of_bounds_workaround)
+    if (image->common.clip_sources                      &&
+        image->common.repeat == PIXMAN_REPEAT_NONE      &&
+        out_of_bounds_workaround)
     {
 	const pixman_box32_t *boxes;
 	int n;
@@ -778,7 +830,7 @@ source_image_needs_out_of_bounds_workaround (bits_image_t *image)
 
 	return TRUE;
     }
-	
+
     return FALSE;
 }
 
@@ -786,7 +838,7 @@ static void
 bits_image_property_changed (pixman_image_t *image)
 {
     bits_image_t *bits = (bits_image_t *)image;
-    
+
     _pixman_bits_image_setup_raw_accessors (bits);
 
     if (bits->common.alpha_map)
@@ -797,16 +849,16 @@ bits_image_property_changed (pixman_image_t *image)
 	    bits_image_fetch_transformed;
     }
     else if ((bits->common.repeat != PIXMAN_REPEAT_NONE) &&
-	    bits->width == 1 &&
-	    bits->height == 1)
+             bits->width == 1 &&
+             bits->height == 1)
     {
 	image->common.get_scanline_64 = bits_image_fetch_solid_64;
 	image->common.get_scanline_32 = bits_image_fetch_solid_32;
     }
     else if (!bits->common.transform &&
-	     bits->common.filter != PIXMAN_FILTER_CONVOLUTION &&
-	     (bits->common.repeat == PIXMAN_REPEAT_NONE ||
-	      bits->common.repeat == PIXMAN_REPEAT_NORMAL))
+             bits->common.filter != PIXMAN_FILTER_CONVOLUTION &&
+             (bits->common.repeat == PIXMAN_REPEAT_NONE ||
+              bits->common.repeat == PIXMAN_REPEAT_NORMAL))
     {
 	image->common.get_scanline_64 = bits_image_fetch_untransformed_64;
 	image->common.get_scanline_32 = bits_image_fetch_untransformed_32;
@@ -823,78 +875,79 @@ bits_image_property_changed (pixman_image_t *image)
     bits->store_scanline_32 = bits_image_store_scanline_32;
 
     bits->common.need_workaround =
-	source_image_needs_out_of_bounds_workaround (bits);
+        source_image_needs_out_of_bounds_workaround (bits);
 }
 
 static uint32_t *
 create_bits (pixman_format_code_t format,
-	     int		  width,
-	     int		  height,
-	     int		 *rowstride_bytes)
+             int                  width,
+             int                  height,
+             int *                rowstride_bytes)
 {
     int stride;
     int buf_size;
     int bpp;
-    
+
     /* what follows is a long-winded way, avoiding any possibility of integer
      * overflows, of saying:
      * stride = ((width * bpp + 0x1f) >> 5) * sizeof (uint32_t);
      */
-    
+
     bpp = PIXMAN_FORMAT_BPP (format);
     if (pixman_multiply_overflows_int (width, bpp))
 	return NULL;
-    
+
     stride = width * bpp;
     if (pixman_addition_overflows_int (stride, 0x1f))
 	return NULL;
-    
+
     stride += 0x1f;
     stride >>= 5;
-    
+
     stride *= sizeof (uint32_t);
-    
+
     if (pixman_multiply_overflows_int (height, stride))
 	return NULL;
-    
+
     buf_size = height * stride;
-    
+
     if (rowstride_bytes)
 	*rowstride_bytes = stride;
-    
+
     return calloc (buf_size, 1);
 }
 
 PIXMAN_EXPORT pixman_image_t *
-pixman_image_create_bits (pixman_format_code_t  format,
-			  int                   width,
-			  int                   height,
-			  uint32_t	       *bits,
-			  int			rowstride_bytes)
+pixman_image_create_bits (pixman_format_code_t format,
+                          int                  width,
+                          int                  height,
+                          uint32_t *           bits,
+                          int                  rowstride_bytes)
 {
     pixman_image_t *image;
     uint32_t *free_me = NULL;
-    
+
     /* must be a whole number of uint32_t's
      */
     return_val_if_fail (bits == NULL ||
-			(rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
-    
+                        (rowstride_bytes % sizeof (uint32_t)) == 0, NULL);
+
     if (!bits && width && height)
     {
 	free_me = bits = create_bits (format, width, height, &rowstride_bytes);
 	if (!bits)
 	    return NULL;
     }
-    
-    image = _pixman_image_allocate();
-    
-    if (!image) {
+
+    image = _pixman_image_allocate ();
+
+    if (!image)
+    {
 	if (free_me)
 	    free (free_me);
 	return NULL;
     }
-    
+
     image->type = BITS;
     image->bits.format = format;
     image->bits.width = width;
@@ -904,14 +957,14 @@ pixman_image_create_bits (pixman_format_code_t  format,
 
     /* The rowstride is stored in number of uint32_t */
     image->bits.rowstride = rowstride_bytes / (int) sizeof (uint32_t);
-    
+
     image->bits.indexed = NULL;
-    
+
     image->common.property_changed = bits_image_property_changed;
-    
+
     bits_image_property_changed (image);
-    
+
     _pixman_image_reset_clip_region (image);
-    
+
     return image;
 }
commit 89eda86549e8cf938556b1a238960f2fbb7b911c
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:45:56 2009 -0400

    Reindent and reformat pixman-arm-simd.c

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index d16ca18..b7912b6 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -30,26 +30,25 @@
 #include "pixman-private.h"
 
 static void
-arm_composite_add_8000_8000 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t op,
-				pixman_image_t * src_image,
-				pixman_image_t * mask_image,
-				pixman_image_t * dst_image,
-				int32_t      src_x,
-				int32_t      src_y,
-				int32_t      mask_x,
-				int32_t      mask_y,
-				int32_t      dest_x,
-				int32_t      dest_y,
-				int32_t      width,
-				int32_t      height)
+arm_composite_add_8000_8000 (pixman_implementation_t * impl,
+    pixman_op_t               op,
+    pixman_image_t *          src_image,
+    pixman_image_t *          mask_image,
+    pixman_image_t *          dst_image,
+    int32_t                   src_x,
+    int32_t                   src_y,
+    int32_t                   mask_x,
+    int32_t                   mask_y,
+    int32_t                   dest_x,
+    int32_t                   dest_y,
+    int32_t                   width,
+    int32_t                   height)
 {
-    uint8_t	*dst_line, *dst;
-    uint8_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
-    uint8_t	s, d;
+    uint8_t     *dst_line, *dst;
+    uint8_t     *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
+    uint8_t s, d;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
@@ -62,13 +61,14 @@ arm_composite_add_8000_8000 (
 	src_line += src_stride;
 	w = width;
 
-        /* ensure both src and dst are properly aligned before doing 32 bit reads
-         * we'll stay in this loop if src and dst have differing alignments */
+	/* ensure both src and dst are properly aligned before doing 32 bit reads
+	 * we'll stay in this loop if src and dst have differing alignments
+	 */
 	while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3)))
 	{
 	    s = *src;
 	    d = *dst;
-	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s));
 	    *dst = d;
 
 	    dst++;
@@ -78,7 +78,9 @@ arm_composite_add_8000_8000 (
 
 	while (w >= 4)
 	{
-	    asm("uqadd8 %0, %1, %2" : "=r"(*(uint32_t*)dst) : "r"(*(uint32_t*)src), "r"(*(uint32_t*)dst));
+	    asm ("uqadd8 %0, %1, %2"
+		 : "=r" (*(uint32_t*)dst)
+		 : "r" (*(uint32_t*)src), "r" (*(uint32_t*)dst));
 	    dst += 4;
 	    src += 4;
 	    w -= 4;
@@ -88,7 +90,7 @@ arm_composite_add_8000_8000 (
 	{
 	    s = *src;
 	    d = *dst;
-	    asm("uqadd8 %0, %1, %2" : "+r"(d) : "r"(s));
+	    asm ("uqadd8 %0, %1, %2" : "+r" (d) : "r" (s));
 	    *dst = d;
 
 	    dst++;
@@ -100,25 +102,24 @@ arm_composite_add_8000_8000 (
 }
 
 static void
-arm_composite_over_8888_8888 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t op,
-			 pixman_image_t * src_image,
-			 pixman_image_t * mask_image,
-			 pixman_image_t * dst_image,
-			 int32_t      src_x,
-			 int32_t      src_y,
-			 int32_t      mask_x,
-			 int32_t      mask_y,
-			 int32_t      dest_x,
-			 int32_t      dest_y,
-			 int32_t      width,
-			 int32_t      height)
+arm_composite_over_8888_8888 (pixman_implementation_t * impl,
+    pixman_op_t               op,
+    pixman_image_t *          src_image,
+    pixman_image_t *          mask_image,
+    pixman_image_t *          dst_image,
+    int32_t                   src_x,
+    int32_t                   src_y,
+    int32_t                   mask_x,
+    int32_t                   mask_y,
+    int32_t                   dest_x,
+    int32_t                   dest_y,
+    int32_t                   width,
+    int32_t                   height)
 {
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint16_t w;
     uint32_t component_half = 0x800080;
     uint32_t upper_component_mask = 0xff00ff00;
     uint32_t alpha_mask = 0xff;
@@ -136,83 +137,84 @@ arm_composite_over_8888_8888 (
 
 //#define inner_branch
 	asm volatile (
-			"cmp %[w], #0\n\t"
-			"beq 2f\n\t"
-			"1:\n\t"
-			/* load src */
-			"ldr r5, [%[src]], #4\n\t"
+	    "cmp %[w], #0\n\t"
+	    "beq 2f\n\t"
+	    "1:\n\t"
+	    /* load src */
+	    "ldr r5, [%[src]], #4\n\t"
 #ifdef inner_branch
-			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-			 * The 0x0 case also allows us to avoid doing an unecessary data
-			 * write which is more valuable so we only check for that */
-			"cmp r5, #0\n\t"
-			"beq 3f\n\t"
+	    /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+	     * The 0x0 case also allows us to avoid doing an unecessary data
+	     * write which is more valuable so we only check for that
+	     */
+	    "cmp r5, #0\n\t"
+	    "beq 3f\n\t"
 
-			/* = 255 - alpha */
-			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+	    /* = 255 - alpha */
+	    "sub r8, %[alpha_mask], r5, lsr #24\n\t"
 
-			"ldr r4, [%[dest]] \n\t"
+	    "ldr r4, [%[dest]] \n\t"
 
 #else
-			"ldr r4, [%[dest]] \n\t"
+	    "ldr r4, [%[dest]] \n\t"
 
-			/* = 255 - alpha */
-			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+	    /* = 255 - alpha */
+	    "sub r8, %[alpha_mask], r5, lsr #24\n\t"
 #endif
-			"uxtb16 r6, r4\n\t"
-			"uxtb16 r7, r4, ror #8\n\t"
+	    "uxtb16 r6, r4\n\t"
+	    "uxtb16 r7, r4, ror #8\n\t"
 
-			/* multiply by 257 and divide by 65536 */
-			"mla r6, r6, r8, %[component_half]\n\t"
-			"mla r7, r7, r8, %[component_half]\n\t"
+	    /* multiply by 257 and divide by 65536 */
+	    "mla r6, r6, r8, %[component_half]\n\t"
+	    "mla r7, r7, r8, %[component_half]\n\t"
 
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
+	    "uxtab16 r6, r6, r6, ror #8\n\t"
+	    "uxtab16 r7, r7, r7, ror #8\n\t"
 
-			/* recombine the 0xff00ff00 bytes of r6 and r7 */
-			"and r7, r7, %[upper_component_mask]\n\t"
-			"uxtab16 r6, r7, r6, ror #8\n\t"
+	    /* recombine the 0xff00ff00 bytes of r6 and r7 */
+	    "and r7, r7, %[upper_component_mask]\n\t"
+	    "uxtab16 r6, r7, r6, ror #8\n\t"
 
-			"uqadd8 r5, r6, r5\n\t"
+	    "uqadd8 r5, r6, r5\n\t"
 
 #ifdef inner_branch
-			"3:\n\t"
+	    "3:\n\t"
 
 #endif
-			"str r5, [%[dest]], #4\n\t"
-			/* increment counter and jmp to top */
-			"subs	%[w], %[w], #1\n\t"
-			"bne	1b\n\t"
-			"2:\n\t"
-			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-			: [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
-			  [alpha_mask] "r" (alpha_mask)
-			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-			);
+	    "str r5, [%[dest]], #4\n\t"
+	    /* increment counter and jmp to top */
+	    "subs	%[w], %[w], #1\n\t"
+	    "bne	1b\n\t"
+	    "2:\n\t"
+	    : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+	    : [component_half] "r" (component_half), [upper_component_mask] "r" (upper_component_mask),
+	    [alpha_mask] "r" (alpha_mask)
+	    : "r4", "r5", "r6", "r7", "r8", "cc", "memory"
+	    );
     }
 }
 
 static void
 arm_composite_over_8888_n_8888 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t	src_x,
-			       int32_t	src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t      width,
-			       int32_t      height)
+    pixman_implementation_t * impl,
+    pixman_op_t               op,
+    pixman_image_t *          src_image,
+    pixman_image_t *          mask_image,
+    pixman_image_t *          dst_image,
+    int32_t                   src_x,
+    int32_t                   src_y,
+    int32_t                   mask_x,
+    int32_t                   mask_y,
+    int32_t                   dest_x,
+    int32_t                   dest_y,
+    int32_t                   width,
+    int32_t                   height)
 {
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t	mask;
-    int	dst_stride, src_stride;
-    uint16_t	w;
+    uint32_t *dst_line, *dst;
+    uint32_t *src_line, *src;
+    uint32_t mask;
+    int dst_stride, src_stride;
+    uint16_t w;
     uint32_t component_half = 0x800080;
     uint32_t alpha_mask = 0xff;
 
@@ -230,102 +232,102 @@ arm_composite_over_8888_n_8888 (
 	src_line += src_stride;
 	w = width;
 
-//#define inner_branch
+/* #define inner_branch */
 	asm volatile (
-			"cmp %[w], #0\n\t"
-			"beq 2f\n\t"
-			"1:\n\t"
-			/* load src */
-			"ldr r5, [%[src]], #4\n\t"
+	    "cmp %[w], #0\n\t"
+	    "beq 2f\n\t"
+	    "1:\n\t"
+	    /* load src */
+	    "ldr r5, [%[src]], #4\n\t"
 #ifdef inner_branch
-			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-			 * The 0x0 case also allows us to avoid doing an unecessary data
-			 * write which is more valuable so we only check for that */
-			"cmp r5, #0\n\t"
-			"beq 3f\n\t"
+	    /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+	     * The 0x0 case also allows us to avoid doing an unecessary data
+	     * write which is more valuable so we only check for that
+	     */
+	    "cmp r5, #0\n\t"
+	    "beq 3f\n\t"
 
 #endif
-			"ldr r4, [%[dest]] \n\t"
+	    "ldr r4, [%[dest]] \n\t"
 
-			"uxtb16 r6, r5\n\t"
-			"uxtb16 r7, r5, ror #8\n\t"
+	    "uxtb16 r6, r5\n\t"
+	    "uxtb16 r7, r5, ror #8\n\t"
 
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, r6, %[mask_alpha], %[component_half]\n\t"
-			"mla r7, r7, %[mask_alpha], %[component_half]\n\t"
+	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
+	    "mla r6, r6, %[mask_alpha], %[component_half]\n\t"
+	    "mla r7, r7, %[mask_alpha], %[component_half]\n\t"
 
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
+	    "uxtab16 r6, r6, r6, ror #8\n\t"
+	    "uxtab16 r7, r7, r7, ror #8\n\t"
 
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
+	    "uxtb16 r6, r6, ror #8\n\t"
+	    "uxtb16 r7, r7, ror #8\n\t"
 
-			/* recombine */
-			"orr r5, r6, r7, lsl #8\n\t"
+	    /* recombine */
+	    "orr r5, r6, r7, lsl #8\n\t"
 
-			"uxtb16 r6, r4\n\t"
-			"uxtb16 r7, r4, ror #8\n\t"
+	    "uxtb16 r6, r4\n\t"
+	    "uxtb16 r7, r4, ror #8\n\t"
 
-			/* 255 - alpha */
-			"sub r8, %[alpha_mask], r5, lsr #24\n\t"
+	    /* 255 - alpha */
+	    "sub r8, %[alpha_mask], r5, lsr #24\n\t"
 
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, r6, r8, %[component_half]\n\t"
-			"mla r7, r7, r8, %[component_half]\n\t"
+	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
+	    "mla r6, r6, r8, %[component_half]\n\t"
+	    "mla r7, r7, r8, %[component_half]\n\t"
 
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
+	    "uxtab16 r6, r6, r6, ror #8\n\t"
+	    "uxtab16 r7, r7, r7, ror #8\n\t"
 
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
+	    "uxtb16 r6, r6, ror #8\n\t"
+	    "uxtb16 r7, r7, ror #8\n\t"
 
-			/* recombine */
-			"orr r6, r6, r7, lsl #8\n\t"
+	    /* recombine */
+	    "orr r6, r6, r7, lsl #8\n\t"
 
-			"uqadd8 r5, r6, r5\n\t"
+	    "uqadd8 r5, r6, r5\n\t"
 
 #ifdef inner_branch
-			"3:\n\t"
+	    "3:\n\t"
 
 #endif
-			"str r5, [%[dest]], #4\n\t"
-			/* increment counter and jmp to top */
-			"subs	%[w], %[w], #1\n\t"
-			"bne	1b\n\t"
-			"2:\n\t"
-			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
-			: [component_half] "r" (component_half), [mask_alpha] "r" (mask),
-			  [alpha_mask] "r" (alpha_mask)
-			: "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
-			);
+	    "str r5, [%[dest]], #4\n\t"
+	    /* increment counter and jmp to top */
+	    "subs	%[w], %[w], #1\n\t"
+	    "bne	1b\n\t"
+	    "2:\n\t"
+	    : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src)
+	    : [component_half] "r" (component_half), [mask_alpha] "r" (mask),
+	    [alpha_mask] "r" (alpha_mask)
+	    : "r4", "r5", "r6", "r7", "r8", "r9", "cc", "memory"
+	    );
     }
 }
 
 static void
-arm_composite_over_n_8_8888 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t      op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t      src_x,
-			       int32_t      src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t      width,
-			       int32_t      height)
+arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
+			     pixman_op_t               op,
+			     pixman_image_t *          src_image,
+			     pixman_image_t *          mask_image,
+			     pixman_image_t *          dst_image,
+			     int32_t                   src_x,
+			     int32_t                   src_y,
+			     int32_t                   mask_x,
+			     int32_t                   mask_y,
+			     int32_t                   dest_x,
+			     int32_t                   dest_y,
+			     int32_t                   width,
+			     int32_t                   height)
 {
-    uint32_t	 src, srca;
-    uint32_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int		 dst_stride, mask_stride;
-    uint16_t	 w;
+    uint32_t src, srca;
+    uint32_t *dst_line, *dst;
+    uint8_t  *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint16_t w;
 
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
 
-    // bail out if fully transparent
+    /* bail out if fully transparent */
     srca = src >> 24;
     if (src == 0)
 	return;
@@ -347,86 +349,86 @@ arm_composite_over_n_8_8888 (
 	mask_line += mask_stride;
 	w = width;
 
-//#define inner_branch
+/* #define inner_branch */
 	asm volatile (
-			"cmp %[w], #0\n\t"
-			"beq 2f\n\t"
-			"1:\n\t"
-			/* load mask */
-			"ldrb r5, [%[mask]], #1\n\t"
+	    "cmp %[w], #0\n\t"
+	    "beq 2f\n\t"
+	    "1:\n\t"
+	    /* load mask */
+	    "ldrb r5, [%[mask]], #1\n\t"
 #ifdef inner_branch
-			/* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
-			 * The 0x0 case also allows us to avoid doing an unecessary data
-			 * write which is more valuable so we only check for that */
-			"cmp r5, #0\n\t"
-			"beq 3f\n\t"
+	    /* We can avoid doing the multiplication in two cases: 0x0 or 0xff.
+	     * The 0x0 case also allows us to avoid doing an unecessary data
+	     * write which is more valuable so we only check for that
+	     */
+	    "cmp r5, #0\n\t"
+	    "beq 3f\n\t"
 
 #endif
-			"ldr r4, [%[dest]] \n\t"
+	    "ldr r4, [%[dest]] \n\t"
 
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, %[src_lo], r5, %[component_half]\n\t"
-			"mla r7, %[src_hi], r5, %[component_half]\n\t"
+	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
+	    "mla r6, %[src_lo], r5, %[component_half]\n\t"
+	    "mla r7, %[src_hi], r5, %[component_half]\n\t"
 
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
+	    "uxtab16 r6, r6, r6, ror #8\n\t"
+	    "uxtab16 r7, r7, r7, ror #8\n\t"
 
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
+	    "uxtb16 r6, r6, ror #8\n\t"
+	    "uxtb16 r7, r7, ror #8\n\t"
 
-			/* recombine */
-			"orr r5, r6, r7, lsl #8\n\t"
+	    /* recombine */
+	    "orr r5, r6, r7, lsl #8\n\t"
 
-			"uxtb16 r6, r4\n\t"
-			"uxtb16 r7, r4, ror #8\n\t"
+	    "uxtb16 r6, r4\n\t"
+	    "uxtb16 r7, r4, ror #8\n\t"
 
-			/* we could simplify this to use 'sub' if we were
-			 * willing to give up a register for alpha_mask */
-			"mvn r8, r5\n\t"
-			"mov r8, r8, lsr #24\n\t"
+	    /* we could simplify this to use 'sub' if we were
+	    * willing to give up a register for alpha_mask */
+	    "mvn r8, r5\n\t"
+	    "mov r8, r8, lsr #24\n\t"
 
-			/* multiply by alpha (r8) then by 257 and divide by 65536 */
-			"mla r6, r6, r8, %[component_half]\n\t"
-			"mla r7, r7, r8, %[component_half]\n\t"
+	    /* multiply by alpha (r8) then by 257 and divide by 65536 */
+	    "mla r6, r6, r8, %[component_half]\n\t"
+	    "mla r7, r7, r8, %[component_half]\n\t"
 
-			"uxtab16 r6, r6, r6, ror #8\n\t"
-			"uxtab16 r7, r7, r7, ror #8\n\t"
+	    "uxtab16 r6, r6, r6, ror #8\n\t"
+	    "uxtab16 r7, r7, r7, ror #8\n\t"
 
-			"uxtb16 r6, r6, ror #8\n\t"
-			"uxtb16 r7, r7, ror #8\n\t"
+	    "uxtb16 r6, r6, ror #8\n\t"
+	    "uxtb16 r7, r7, ror #8\n\t"
 
-			/* recombine */
-			"orr r6, r6, r7, lsl #8\n\t"
+	    /* recombine */
+	    "orr r6, r6, r7, lsl #8\n\t"
 
-			"uqadd8 r5, r6, r5\n\t"
+	    "uqadd8 r5, r6, r5\n\t"
 
 #ifdef inner_branch
-			"3:\n\t"
+	    "3:\n\t"
 
 #endif
-			"str r5, [%[dest]], #4\n\t"
-			/* increment counter and jmp to top */
-			"subs	%[w], %[w], #1\n\t"
-			"bne	1b\n\t"
-			"2:\n\t"
-			: [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
-			: [component_half] "r" (component_half),
-			  [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
-			: "r4", "r5", "r6", "r7", "r8", "cc", "memory"
-			);
+	    "str r5, [%[dest]], #4\n\t"
+	    /* increment counter and jmp to top */
+	    "subs	%[w], %[w], #1\n\t"
+	    "bne	1b\n\t"
+	    "2:\n\t"
+	    : [w] "+r" (w), [dest] "+r" (dst), [src] "+r" (src), [mask] "+r" (mask)
+	    : [component_half] "r" (component_half),
+	      [src_hi] "r" (src_hi), [src_lo] "r" (src_lo)
+	    : "r4", "r5", "r6", "r7", "r8", "cc", "memory");
     }
 }
 
 static const pixman_fast_path_t arm_simd_fast_path_array[] =
 {
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, arm_composite_over_8888_8888,      0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,	PIXMAN_x8r8g8b8, arm_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_a8b8g8r8, arm_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,	PIXMAN_x8b8g8r8, arm_composite_over_8888_8888,	   0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, arm_composite_over_8888_n_8888,    NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, arm_composite_over_8888_n_8888,	   NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, arm_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, arm_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, arm_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, arm_composite_over_8888_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, arm_composite_over_8888_n_8888,  NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, arm_composite_over_8888_n_8888,  NEED_SOLID_MASK },
 
-    { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       arm_composite_add_8000_8000,   0 },
+    { PIXMAN_OP_ADD, PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       arm_composite_add_8000_8000,     0 },
 
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, arm_composite_over_n_8_8888,     0 },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, arm_composite_over_n_8_8888,     0 },
@@ -440,35 +442,35 @@ const pixman_fast_path_t *const arm_simd_fast_paths = arm_simd_fast_path_array;
 
 static void
 arm_simd_composite (pixman_implementation_t *imp,
-		pixman_op_t     op,
-		pixman_image_t *src,
-		pixman_image_t *mask,
-		pixman_image_t *dest,
-		int32_t         src_x,
-		int32_t         src_y,
-		int32_t         mask_x,
-		int32_t         mask_y,
-		int32_t         dest_x,
-		int32_t         dest_y,
-		int32_t        width,
-		int32_t        height)
+                    pixman_op_t              op,
+                    pixman_image_t *         src,
+                    pixman_image_t *         mask,
+                    pixman_image_t *         dest,
+                    int32_t                  src_x,
+                    int32_t                  src_y,
+                    int32_t                  mask_x,
+                    int32_t                  mask_y,
+                    int32_t                  dest_x,
+                    int32_t                  dest_y,
+                    int32_t                  width,
+                    int32_t                  height)
 {
     if (_pixman_run_fast_path (arm_simd_fast_paths, imp,
-			       op, src, mask, dest,
-			       src_x, src_y,
-			       mask_x, mask_y,
-			       dest_x, dest_y,
-			       width, height))
+                               op, src, mask, dest,
+                               src_x, src_y,
+                               mask_x, mask_y,
+                               dest_x, dest_y,
+                               width, height))
     {
 	return;
     }
 
     _pixman_implementation_composite (imp->delegate, op,
-				      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
+                                      src, mask, dest,
+                                      src_x, src_y,
+                                      mask_x, mask_y,
+                                      dest_x, dest_y,
+                                      width, height);
 }
 
 pixman_implementation_t *
@@ -481,3 +483,4 @@ _pixman_implementation_create_arm_simd (void)
 
     return imp;
 }
+
commit 9a26a60a233955aadab65fde5bf31fc0199663ea
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:42:31 2009 -0400

    Reindent and reformat pixman-arm-neon.c

diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index ba29230..a802abb 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -20,7 +20,7 @@
  * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  * SOFTWARE.
  *
- * Author:  Ian Rickards (ian.rickards at arm.com) 
+ * Author:  Ian Rickards (ian.rickards at arm.com)
  * Author:  Jonathan Morton (jonathan.morton at movial.com)
  * Author:  Markku Vire (markku.vire at movial.com)
  *
@@ -34,1283 +34,1338 @@
 #include <string.h>
 #include "pixman-private.h"
 
-// Deal with an intrinsic that is defined differently in GCC
+/* Deal with an intrinsic that is defined differently in GCC */
 #if !defined(__ARMCC_VERSION) && !defined(__pld)
-#define __pld(_x) __builtin_prefetch(_x)
+#define __pld(_x) __builtin_prefetch (_x)
 #endif
 
-static force_inline uint8x8x4_t unpack0565(uint16x8_t rgb)
+static force_inline uint8x8x4_t
+unpack0565 (uint16x8_t rgb)
 {
     uint16x8_t gb, b;
     uint8x8x4_t res;
 
-    res.val[3] = vdup_n_u8(0);
-    gb = vshrq_n_u16(rgb, 5);
-    b = vshrq_n_u16(rgb, 5+6);
-    res.val[0] = vmovn_u16(rgb);  // get low 5 bits
-    res.val[1] = vmovn_u16(gb);   // get mid 6 bits
-    res.val[2] = vmovn_u16(b);    // get top 5 bits
+    res.val[3] = vdup_n_u8 (0);
+    gb = vshrq_n_u16 (rgb, 5);
+    b = vshrq_n_u16 (rgb, 5 + 6);
 
-    res.val[0] = vshl_n_u8(res.val[0], 3); // shift to top
-    res.val[1] = vshl_n_u8(res.val[1], 2); // shift to top
-    res.val[2] = vshl_n_u8(res.val[2], 3); // shift to top
+    res.val[0] = vmovn_u16 (rgb);  /* get low 5 bits */
+    res.val[1] = vmovn_u16 (gb);   /* get mid 6 bits */
+    res.val[2] = vmovn_u16 (b);    /* get top 5 bits */
 
-    res.val[0] = vsri_n_u8(res.val[0], res.val[0], 5); 
-    res.val[1] = vsri_n_u8(res.val[1], res.val[1], 6);
-    res.val[2] = vsri_n_u8(res.val[2], res.val[2], 5);
+    res.val[0] = vshl_n_u8 (res.val[0], 3); /* shift to top */
+    res.val[1] = vshl_n_u8 (res.val[1], 2); /* shift to top */
+    res.val[2] = vshl_n_u8 (res.val[2], 3); /* shift to top */
+
+    res.val[0] = vsri_n_u8 (res.val[0], res.val[0], 5);
+    res.val[1] = vsri_n_u8 (res.val[1], res.val[1], 6);
+    res.val[2] = vsri_n_u8 (res.val[2], res.val[2], 5);
 
     return res;
 }
 
-static force_inline uint16x8_t pack0565(uint8x8x4_t s)
+static force_inline uint16x8_t
+pack0565 (uint8x8x4_t s)
 {
     uint16x8_t rgb, val_g, val_r;
 
-    rgb = vshll_n_u8(s.val[2],8);
-    val_g = vshll_n_u8(s.val[1],8);
-    val_r = vshll_n_u8(s.val[0],8);
-    rgb = vsriq_n_u16(rgb, val_g, 5);
-    rgb = vsriq_n_u16(rgb, val_r, 5+6);
+    rgb = vshll_n_u8 (s.val[2], 8);
+    val_g = vshll_n_u8 (s.val[1], 8);
+    val_r = vshll_n_u8 (s.val[0], 8);
+    rgb = vsriq_n_u16 (rgb, val_g, 5);
+    rgb = vsriq_n_u16 (rgb, val_r, 5 + 6);
 
     return rgb;
 }
 
-static force_inline uint8x8_t neon2mul(uint8x8_t x, uint8x8_t alpha)
+static force_inline uint8x8_t
+neon2mul (uint8x8_t x,
+          uint8x8_t alpha)
 {
-    uint16x8_t tmp,tmp2;
+    uint16x8_t tmp, tmp2;
     uint8x8_t res;
 
-    tmp = vmull_u8(x,alpha);
-    tmp2 = vrshrq_n_u16(tmp,8);
-    res = vraddhn_u16(tmp,tmp2);
+    tmp = vmull_u8 (x, alpha);
+    tmp2 = vrshrq_n_u16 (tmp, 8);
+    res = vraddhn_u16 (tmp, tmp2);
 
     return res;
 }
 
-static force_inline uint8x8x4_t neon8mul(uint8x8x4_t x, uint8x8_t alpha)
+static force_inline uint8x8x4_t
+neon8mul (uint8x8x4_t x,
+          uint8x8_t   alpha)
 {
     uint16x8x4_t tmp;
     uint8x8x4_t res;
-    uint16x8_t qtmp1,qtmp2;
-
-    tmp.val[0] = vmull_u8(x.val[0],alpha);
-    tmp.val[1] = vmull_u8(x.val[1],alpha);
-    tmp.val[2] = vmull_u8(x.val[2],alpha);
-    tmp.val[3] = vmull_u8(x.val[3],alpha);
-
-    qtmp1 = vrshrq_n_u16(tmp.val[0],8);
-    qtmp2 = vrshrq_n_u16(tmp.val[1],8);
-    res.val[0] = vraddhn_u16(tmp.val[0],qtmp1);
-    qtmp1 = vrshrq_n_u16(tmp.val[2],8);
-    res.val[1] = vraddhn_u16(tmp.val[1],qtmp2);
-    qtmp2 = vrshrq_n_u16(tmp.val[3],8);
-    res.val[2] = vraddhn_u16(tmp.val[2],qtmp1);
-    res.val[3] = vraddhn_u16(tmp.val[3],qtmp2);
+    uint16x8_t qtmp1, qtmp2;
+
+    tmp.val[0] = vmull_u8 (x.val[0], alpha);
+    tmp.val[1] = vmull_u8 (x.val[1], alpha);
+    tmp.val[2] = vmull_u8 (x.val[2], alpha);
+    tmp.val[3] = vmull_u8 (x.val[3], alpha);
+
+    qtmp1 = vrshrq_n_u16 (tmp.val[0], 8);
+    qtmp2 = vrshrq_n_u16 (tmp.val[1], 8);
+    res.val[0] = vraddhn_u16 (tmp.val[0], qtmp1);
+    qtmp1 = vrshrq_n_u16 (tmp.val[2], 8);
+    res.val[1] = vraddhn_u16 (tmp.val[1], qtmp2);
+    qtmp2 = vrshrq_n_u16 (tmp.val[3], 8);
+    res.val[2] = vraddhn_u16 (tmp.val[2], qtmp1);
+    res.val[3] = vraddhn_u16 (tmp.val[3], qtmp2);
 
     return res;
 }
 
-static force_inline uint8x8x4_t neon8qadd(uint8x8x4_t x, uint8x8x4_t y)
+static force_inline uint8x8x4_t
+neon8qadd (uint8x8x4_t x,
+           uint8x8x4_t y)
 {
     uint8x8x4_t res;
 
-    res.val[0] = vqadd_u8(x.val[0],y.val[0]);
-    res.val[1] = vqadd_u8(x.val[1],y.val[1]);
-    res.val[2] = vqadd_u8(x.val[2],y.val[2]);
-    res.val[3] = vqadd_u8(x.val[3],y.val[3]);
+    res.val[0] = vqadd_u8 (x.val[0], y.val[0]);
+    res.val[1] = vqadd_u8 (x.val[1], y.val[1]);
+    res.val[2] = vqadd_u8 (x.val[2], y.val[2]);
+    res.val[3] = vqadd_u8 (x.val[3], y.val[3]);
 
     return res;
 }
 
-
 static void
-neon_composite_add_8000_8000 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t op,
-                                pixman_image_t * src_image,
-                                pixman_image_t * mask_image,
-                                pixman_image_t * dst_image,
-                                int32_t      src_x,
-                                int32_t      src_y,
-                                int32_t      mask_x,
-                                int32_t      mask_y,
-                                int32_t      dest_x,
-                                int32_t      dest_y,
-                                int32_t      width,
-                                int32_t      height)
+neon_composite_add_8000_8000 (pixman_implementation_t * impl,
+                              pixman_op_t               op,
+                              pixman_image_t *          src_image,
+                              pixman_image_t *          mask_image,
+                              pixman_image_t *          dst_image,
+                              int32_t                   src_x,
+                              int32_t                   src_y,
+                              int32_t                   mask_x,
+                              int32_t                   mask_y,
+                              int32_t                   dest_x,
+                              int32_t                   dest_y,
+                              int32_t                   width,
+                              int32_t                   height)
 {
     uint8_t     *dst_line, *dst;
     uint8_t     *src_line, *src;
     int dst_stride, src_stride;
-    uint16_t    w;
+    uint16_t w;
 
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
 
-    if (width>=8)
+    if (width >= 8)
     {
-        // Use overlapping 8-pixel method
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            src = src_line;
-            src_line += src_stride;
-            w = width;
+	/* Use overlapping 8-pixel method */
+	while (height--)
+	{
+	    uint8_t *keep_dst = 0;
+	    uint8x8_t sval, dval, temp;
 
-            uint8_t *keep_dst=0;
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    src = src_line;
+	    src_line += src_stride;
+	    w = width;
 
 #ifndef USE_GCC_INLINE_ASM
-            uint8x8_t sval,dval,temp;
+	    sval = vld1_u8 ((void*)src);
+	    dval = vld1_u8 ((void*)dst);
+	    keep_dst = dst;
 
-            sval = vld1_u8((void*)src);
-            dval = vld1_u8((void*)dst);
-            keep_dst = dst;
+	    temp = vqadd_u8 (dval, sval);
 
-            temp = vqadd_u8(dval,sval);
+	    src += (w & 7);
+	    dst += (w & 7);
+	    w -= (w & 7);
 
-            src += (w & 7);
-            dst += (w & 7);
-            w -= (w & 7);
+	    while (w)
+	    {
+		sval = vld1_u8 ((void*)src);
+		dval = vld1_u8 ((void*)dst);
 
-            while (w)
-            {
-                sval = vld1_u8((void*)src);
-                dval = vld1_u8((void*)dst);
+		vst1_u8 ((void*)keep_dst, temp);
+		keep_dst = dst;
 
-                vst1_u8((void*)keep_dst,temp);
-                keep_dst = dst;
+		temp = vqadd_u8 (dval, sval);
 
-                temp = vqadd_u8(dval,sval);
+		src += 8;
+		dst += 8;
+		w -= 8;
+	    }
 
-                src+=8;
-                dst+=8;
-                w-=8;
-            }
-            vst1_u8((void*)keep_dst,temp);
+	    vst1_u8 ((void*)keep_dst, temp);
 #else
-            asm volatile (
-// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-                        "vld1.8  {d0}, [%[src]]\n\t"
-                        "vld1.8  {d4}, [%[dst]]\n\t"
-                        "mov     %[keep_dst], %[dst]\n\t"
-
-                        "and ip, %[w], #7\n\t"
-                        "add %[src], %[src], ip\n\t"
-                        "add %[dst], %[dst], ip\n\t"
-                        "subs %[w], %[w], ip\n\t"
-                        "b 9f\n\t"
-// LOOP
-                        "2:\n\t"
-                        "vld1.8  {d0}, [%[src]]!\n\t"
-                        "vld1.8  {d4}, [%[dst]]!\n\t"
-                        "vst1.8  {d20}, [%[keep_dst]]\n\t"
-                        "sub     %[keep_dst], %[dst], #8\n\t"
-                        "subs %[w], %[w], #8\n\t"
-                        "9:\n\t"
-                        "vqadd.u8 d20, d0, d4\n\t"
-
-                        "bne 2b\n\t"
-
-                        "1:\n\t"
-                        "vst1.8  {d20}, [%[keep_dst]]\n\t"
-
-                        : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
-                        :
-                        : "ip", "cc", "memory", "d0","d4",
-                          "d20"
-                        );
+	    asm volatile (
+/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
+	        "vld1.8  {d0}, [%[src]]\n\t"
+	        "vld1.8  {d4}, [%[dst]]\n\t"
+	        "mov     %[keep_dst], %[dst]\n\t"
+
+	        "and ip, %[w], #7\n\t"
+	        "add %[src], %[src], ip\n\t"
+	        "add %[dst], %[dst], ip\n\t"
+	        "subs %[w], %[w], ip\n\t"
+	        "b 9f\n\t"
+/* LOOP */
+	        "2:\n\t"
+	        "vld1.8  {d0}, [%[src]]!\n\t"
+	        "vld1.8  {d4}, [%[dst]]!\n\t"
+	        "vst1.8  {d20}, [%[keep_dst]]\n\t"
+	        "sub     %[keep_dst], %[dst], #8\n\t"
+	        "subs %[w], %[w], #8\n\t"
+	        "9:\n\t"
+	        "vqadd.u8 d20, d0, d4\n\t"
+
+	        "bne 2b\n\t"
+
+	        "1:\n\t"
+	        "vst1.8  {d20}, [%[keep_dst]]\n\t"
+
+		: [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
+		:
+		: "ip", "cc", "memory", "d0", "d4",
+	        "d20"
+	        );
 #endif
-        }
+	}
     }
     else
     {
-        const uint8_t nil = 0;
-        const uint8x8_t vnil = vld1_dup_u8(&nil);
-
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            src = src_line;
-            src_line += src_stride;
-            w = width;
-            uint8x8_t sval=vnil, dval=vnil;
-            uint8_t *dst4=0, *dst2=0;
-
-            if (w&4)
-            {
-                sval = vreinterpret_u8_u32(vld1_lane_u32((void*)src,vreinterpret_u32_u8(sval),1));
-                dval = vreinterpret_u8_u32(vld1_lane_u32((void*)dst,vreinterpret_u32_u8(dval),1));
-                dst4=dst;
-                src+=4;
-                dst+=4;
-            }
-            if (w&2)
-            {
-                sval = vreinterpret_u8_u16(vld1_lane_u16((void*)src,vreinterpret_u16_u8(sval),1));
-                dval = vreinterpret_u8_u16(vld1_lane_u16((void*)dst,vreinterpret_u16_u8(dval),1));
-                dst2=dst;
-                src+=2;
-                dst+=2;
-            }
-            if (w&1)
-            {
-                sval = vld1_lane_u8(src,sval,1);
-                dval = vld1_lane_u8(dst,dval,1);
-            }
-
-            dval = vqadd_u8(dval,sval);
-
-            if (w&1)
-                vst1_lane_u8(dst,dval,1);
-            if (w&2)
-                vst1_lane_u16((void*)dst2,vreinterpret_u16_u8(dval),1);
-            if (w&4)
-                vst1_lane_u32((void*)dst4,vreinterpret_u32_u8(dval),1);
-        }
+	const uint8_t nil = 0;
+	const uint8x8_t vnil = vld1_dup_u8 (&nil);
+
+	while (height--)
+	{
+	    uint8x8_t sval = vnil, dval = vnil;
+	    uint8_t *dst4 = 0, *dst2 = 0;
+
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    src = src_line;
+	    src_line += src_stride;
+	    w = width;
+
+	    if (w & 4)
+	    {
+		sval = vreinterpret_u8_u32 (
+		    vld1_lane_u32 ((void*)src, vreinterpret_u32_u8 (sval), 1));
+		dval = vreinterpret_u8_u32 (
+		    vld1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (dval), 1));
+
+		dst4 = dst;
+		src += 4;
+		dst += 4;
+	    }
+
+	    if (w & 2)
+	    {
+		sval = vreinterpret_u8_u16 (
+		    vld1_lane_u16 ((void*)src, vreinterpret_u16_u8 (sval), 1));
+		dval = vreinterpret_u8_u16 (
+		    vld1_lane_u16 ((void*)dst, vreinterpret_u16_u8 (dval), 1));
+
+		dst2 = dst;
+		src += 2;
+		dst += 2;
+	    }
+
+	    if (w & 1)
+	    {
+		sval = vld1_lane_u8 (src, sval, 1);
+		dval = vld1_lane_u8 (dst, dval, 1);
+	    }
+
+	    dval = vqadd_u8 (dval, sval);
+
+	    if (w & 1)
+		vst1_lane_u8 (dst, dval, 1);
+
+	    if (w & 2)
+		vst1_lane_u16 ((void*)dst2, vreinterpret_u16_u8 (dval), 1);
+
+	    if (w & 4)
+		vst1_lane_u32 ((void*)dst4, vreinterpret_u32_u8 (dval), 1);
+	}
     }
 }
 
-
 static void
-neon_composite_over_8888_8888 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t op,
-			 pixman_image_t * src_image,
-			 pixman_image_t * mask_image,
-			 pixman_image_t * dst_image,
-			 int32_t      src_x,
-			 int32_t      src_y,
-			 int32_t      mask_x,
-			 int32_t      mask_y,
-			 int32_t      dest_x,
-			 int32_t      dest_y,
-			 int32_t      width,
-			 int32_t      height)
+neon_composite_over_8888_8888 (pixman_implementation_t * impl,
+                               pixman_op_t               op,
+                               pixman_image_t *          src_image,
+                               pixman_image_t *          mask_image,
+                               pixman_image_t *          dst_image,
+                               int32_t                   src_x,
+                               int32_t                   src_y,
+                               int32_t                   mask_x,
+                               int32_t                   mask_y,
+                               int32_t                   dest_x,
+                               int32_t                   dest_y,
+                               int32_t                   width,
+                               int32_t                   height)
 {
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    int	dst_stride, src_stride;
-    uint32_t	w;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    uint32_t w;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
-    if (width>=8)
+    if (width >= 8)
     {
-        // Use overlapping 8-pixel method  
-        while (height--)
-        {
+	/* Use overlapping 8-pixel method */
+	while (height--)
+	{
+	    uint32_t *keep_dst = 0;
+	    uint8x8x4_t sval, dval, temp;
+
 	    dst = dst_line;
 	    dst_line += dst_stride;
 	    src = src_line;
 	    src_line += src_stride;
 	    w = width;
 
-            uint32_t *keep_dst=0;
-
 #ifndef USE_GCC_INLINE_ASM
-            uint8x8x4_t sval,dval,temp;
+	    sval = vld4_u8 ((void*)src);
+	    dval = vld4_u8 ((void*)dst);
+	    keep_dst = dst;
 
-            sval = vld4_u8((void*)src);
-            dval = vld4_u8((void*)dst);
-            keep_dst = dst;
+	    temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
+	    temp = neon8qadd (sval, temp);
 
-            temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-            temp = neon8qadd(sval,temp);
+	    src += (w & 7);
+	    dst += (w & 7);
+	    w -= (w & 7);
 
-            src += (w & 7);
-            dst += (w & 7);
-            w -= (w & 7);
+	    while (w)
+	    {
+		sval = vld4_u8 ((void*)src);
+		dval = vld4_u8 ((void*)dst);
 
-            while (w)
-            {
-                sval = vld4_u8((void*)src);
-                dval = vld4_u8((void*)dst);
+		vst4_u8 ((void*)keep_dst, temp);
+		keep_dst = dst;
 
-                vst4_u8((void*)keep_dst,temp);
-                keep_dst = dst;
+		temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
+		temp = neon8qadd (sval, temp);
 
-                temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-                temp = neon8qadd(sval,temp);
+		src += 8;
+		dst += 8;
+		w -= 8;
+	    }
 
-                src+=8;
-                dst+=8;
-                w-=8;
-            }
-            vst4_u8((void*)keep_dst,temp);
+	    vst4_u8 ((void*)keep_dst, temp);
 #else
-            asm volatile (
-// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-                        "vld4.8  {d0-d3}, [%[src]]\n\t"
-                        "vld4.8  {d4-d7}, [%[dst]]\n\t"
-                        "mov     %[keep_dst], %[dst]\n\t"
-
-                        "and ip, %[w], #7\n\t"
-                        "add %[src], %[src], ip, LSL#2\n\t"
-                        "add %[dst], %[dst], ip, LSL#2\n\t"
-                        "subs %[w], %[w], ip\n\t"
-                        "b 9f\n\t"
-// LOOP
-                        "2:\n\t"
-                        "vld4.8  {d0-d3}, [%[src]]!\n\t"
-                        "vld4.8  {d4-d7}, [%[dst]]!\n\t"
-                        "vst4.8  {d20-d23}, [%[keep_dst]]\n\t"
-                        "sub     %[keep_dst], %[dst], #8*4\n\t"
-                        "subs %[w], %[w], #8\n\t"
-                        "9:\n\t"
-                        "vmvn.8  d31, d3\n\t"
-                        "vmull.u8 q10, d31, d4\n\t"
-                        "vmull.u8 q11, d31, d5\n\t"
-                        "vmull.u8 q12, d31, d6\n\t"
-                        "vmull.u8 q13, d31, d7\n\t"
-                        "vrshr.u16 q8, q10, #8\n\t"
-                        "vrshr.u16 q9, q11, #8\n\t"
-                        "vraddhn.u16 d20, q10, q8\n\t"
-                        "vraddhn.u16 d21, q11, q9\n\t"
-                        "vrshr.u16 q8, q12, #8\n\t"
-                        "vrshr.u16 q9, q13, #8\n\t"
-                        "vraddhn.u16 d22, q12, q8\n\t"
-                        "vraddhn.u16 d23, q13, q9\n\t"
-// result in d20-d23
-                        "vqadd.u8 d20, d0, d20\n\t"
-                        "vqadd.u8 d21, d1, d21\n\t"
-                        "vqadd.u8 d22, d2, d22\n\t"
-                        "vqadd.u8 d23, d3, d23\n\t"
-
-                        "bne 2b\n\t"
-
-                        "1:\n\t"
-                        "vst4.8  {d20-d23}, [%[keep_dst]]\n\t"
-
-                        : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
-                        : 
-                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-                          "d16","d17","d18","d19","d20","d21","d22","d23"
-                        );
+	    asm volatile (
+/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
+	        "vld4.8  {d0-d3}, [%[src]]\n\t"
+	        "vld4.8  {d4-d7}, [%[dst]]\n\t"
+	        "mov     %[keep_dst], %[dst]\n\t"
+
+	        "and ip, %[w], #7\n\t"
+	        "add %[src], %[src], ip, LSL#2\n\t"
+	        "add %[dst], %[dst], ip, LSL#2\n\t"
+	        "subs %[w], %[w], ip\n\t"
+	        "b 9f\n\t"
+/* LOOP */
+	        "2:\n\t"
+	        "vld4.8  {d0-d3}, [%[src]]!\n\t"
+	        "vld4.8  {d4-d7}, [%[dst]]!\n\t"
+	        "vst4.8  {d20-d23}, [%[keep_dst]]\n\t"
+	        "sub     %[keep_dst], %[dst], #8*4\n\t"
+	        "subs %[w], %[w], #8\n\t"
+	        "9:\n\t"
+	        "vmvn.8  d31, d3\n\t"
+	        "vmull.u8 q10, d31, d4\n\t"
+	        "vmull.u8 q11, d31, d5\n\t"
+	        "vmull.u8 q12, d31, d6\n\t"
+	        "vmull.u8 q13, d31, d7\n\t"
+	        "vrshr.u16 q8, q10, #8\n\t"
+	        "vrshr.u16 q9, q11, #8\n\t"
+	        "vraddhn.u16 d20, q10, q8\n\t"
+	        "vraddhn.u16 d21, q11, q9\n\t"
+	        "vrshr.u16 q8, q12, #8\n\t"
+	        "vrshr.u16 q9, q13, #8\n\t"
+	        "vraddhn.u16 d22, q12, q8\n\t"
+	        "vraddhn.u16 d23, q13, q9\n\t"
+/* result in d20-d23 */
+	        "vqadd.u8 d20, d0, d20\n\t"
+	        "vqadd.u8 d21, d1, d21\n\t"
+	        "vqadd.u8 d22, d2, d22\n\t"
+	        "vqadd.u8 d23, d3, d23\n\t"
+
+	        "bne 2b\n\t"
+
+	        "1:\n\t"
+	        "vst4.8  {d20-d23}, [%[keep_dst]]\n\t"
+
+		: [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
+		:
+		: "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+	        "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23"
+	        );
 #endif
-        }
+	}
     }
     else
     {
-        uint8x8_t    alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-
-        // Handle width<8
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            src = src_line;
-            src_line += src_stride;
-            w = width;
-
-            while (w>=2)
-            {
-                uint8x8_t sval,dval;
-
-                /* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
-                sval = vreinterpret_u8_u32(vld1_u32((void*)src));
-                dval = vreinterpret_u8_u32(vld1_u32((void*)dst));
-                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector));
-                vst1_u8((void*)dst,vqadd_u8(sval,dval));
-
-                src+=2;
-                dst+=2;
-                w-=2;
-            }
-
-            if (w)
-            {
-                uint8x8_t sval,dval;
-
-                /* single 32-bit pixel in lane 0 */
-                sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src));  // only interested in lane 0
-                dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));  // only interested in lane 0
-                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval),alpha_selector));
-                vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0);
-            }
-        }
+	uint8x8_t alpha_selector = vreinterpret_u8_u64 (
+	    vcreate_u64 (0x0707070703030303ULL));
+
+	/* Handle width < 8 */
+	while (height--)
+	{
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    src = src_line;
+	    src_line += src_stride;
+	    w = width;
+
+	    while (w >= 2)
+	    {
+		uint8x8_t sval, dval;
+
+		/* two 32-bit pixels packed into D-reg; ad-hoc vectorization */
+		sval = vreinterpret_u8_u32 (vld1_u32 ((void*)src));
+		dval = vreinterpret_u8_u32 (vld1_u32 ((void*)dst));
+		dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
+		vst1_u8 ((void*)dst, vqadd_u8 (sval, dval));
+
+		src += 2;
+		dst += 2;
+		w -= 2;
+	    }
+
+	    if (w)
+	    {
+		uint8x8_t sval, dval;
+
+		/* single 32-bit pixel in lane 0 */
+		sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)src));  /* only interested in lane 0 */
+		dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst));  /* only interested in lane 0 */
+		dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
+		vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
+	    }
+	}
     }
 }
 
 static void
-neon_composite_over_8888_n_8888 (
-                               pixman_implementation_t * impl,
-                               pixman_op_t op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t	src_x,
-			       int32_t	src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t      width,
-			       int32_t      height)
+neon_composite_over_8888_n_8888 (pixman_implementation_t * impl,
+                                 pixman_op_t               op,
+                                 pixman_image_t *          src_image,
+                                 pixman_image_t *          mask_image,
+                                 pixman_image_t *          dst_image,
+                                 int32_t                   src_x,
+                                 int32_t                   src_y,
+                                 int32_t                   mask_x,
+                                 int32_t                   mask_y,
+                                 int32_t                   dest_x,
+                                 int32_t                   dest_y,
+                                 int32_t                   width,
+                                 int32_t                   height)
 {
-    uint32_t	*dst_line, *dst;
-    uint32_t	*src_line, *src;
-    uint32_t	mask;
-    int	dst_stride, src_stride;
-    uint32_t	w;
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    uint32_t mask;
+    int dst_stride, src_stride;
+    uint32_t w;
     uint8x8_t mask_alpha;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
 
     mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);
-    mask_alpha = vdup_n_u8((mask) >> 24);
+    mask_alpha = vdup_n_u8 ((mask) >> 24);
 
-    if (width>=8)
+    if (width >= 8)
     {
-        // Use overlapping 8-pixel method
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            src = src_line;
-            src_line += src_stride;
-            w = width;
+	/* Use overlapping 8-pixel method */
+	while (height--)
+	{
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    src = src_line;
+	    src_line += src_stride;
+	    w = width;
 
-            uint32_t *keep_dst=0;
+	    uint32_t *keep_dst = 0;
 
 #ifndef USE_GCC_INLINE_ASM
-            uint8x8x4_t sval,dval,temp;
+	    uint8x8x4_t sval, dval, temp;
 
-            sval = vld4_u8((void*)src);
-            dval = vld4_u8((void*)dst);
-            keep_dst = dst;
+	    sval = vld4_u8 ((void*)src);
+	    dval = vld4_u8 ((void*)dst);
+	    keep_dst = dst;
 
-            sval = neon8mul(sval,mask_alpha);
-            temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-            temp = neon8qadd(sval,temp);
+	    sval = neon8mul (sval, mask_alpha);
+	    temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
+	    temp = neon8qadd (sval, temp);
 
-            src += (w & 7);
-            dst += (w & 7);
-            w -= (w & 7);
+	    src += (w & 7);
+	    dst += (w & 7);
+	    w -= (w & 7);
 
-            while (w)
-            {
-                sval = vld4_u8((void*)src);
-                dval = vld4_u8((void*)dst);
+	    while (w)
+	    {
+		sval = vld4_u8 ((void*)src);
+		dval = vld4_u8 ((void*)dst);
 
-                vst4_u8((void*)keep_dst,temp);
-                keep_dst = dst;
+		vst4_u8 ((void*)keep_dst, temp);
+		keep_dst = dst;
 
-                sval = neon8mul(sval,mask_alpha);
-                temp = neon8mul(dval,vmvn_u8(sval.val[3]));
-                temp = neon8qadd(sval,temp);
+		sval = neon8mul (sval, mask_alpha);
+		temp = neon8mul (dval, vmvn_u8 (sval.val[3]));
+		temp = neon8qadd (sval, temp);
 
-                src+=8;
-                dst+=8;
-                w-=8;
-            }
-            vst4_u8((void*)keep_dst,temp);
+		src += 8;
+		dst += 8;
+		w -= 8;
+	    }
+	    vst4_u8 ((void*)keep_dst, temp);
 #else
-            asm volatile (
-// avoid using d8-d15 (q4-q7) aapcs callee-save registers
-                        "vdup.32      d30, %[mask]\n\t"
-                        "vdup.8       d30, d30[3]\n\t"
-
-                        "vld4.8       {d0-d3}, [%[src]]\n\t"
-                        "vld4.8       {d4-d7}, [%[dst]]\n\t"
-                        "mov  %[keep_dst], %[dst]\n\t"
-
-                        "and  ip, %[w], #7\n\t"
-                        "add  %[src], %[src], ip, LSL#2\n\t"
-                        "add  %[dst], %[dst], ip, LSL#2\n\t"
-                        "subs  %[w], %[w], ip\n\t"
-                        "b 9f\n\t"
-// LOOP
-                        "2:\n\t"
-                        "vld4.8       {d0-d3}, [%[src]]!\n\t"
-                        "vld4.8       {d4-d7}, [%[dst]]!\n\t"
-                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-                        "sub  %[keep_dst], %[dst], #8*4\n\t"
-                        "subs  %[w], %[w], #8\n\t"
-
-                        "9:\n\t"
-                        "vmull.u8     q10, d30, d0\n\t"
-                        "vmull.u8     q11, d30, d1\n\t"
-                        "vmull.u8     q12, d30, d2\n\t"
-                        "vmull.u8     q13, d30, d3\n\t"
-                        "vrshr.u16    q8, q10, #8\n\t"
-                        "vrshr.u16    q9, q11, #8\n\t"
-                        "vraddhn.u16  d0, q10, q8\n\t"
-                        "vraddhn.u16  d1, q11, q9\n\t"
-                        "vrshr.u16    q9, q13, #8\n\t"
-                        "vrshr.u16    q8, q12, #8\n\t"
-                        "vraddhn.u16  d3, q13, q9\n\t"
-                        "vraddhn.u16  d2, q12, q8\n\t"
-
-                        "vmvn.8       d31, d3\n\t"
-                        "vmull.u8     q10, d31, d4\n\t"
-                        "vmull.u8     q11, d31, d5\n\t"
-                        "vmull.u8     q12, d31, d6\n\t"
-                        "vmull.u8     q13, d31, d7\n\t"
-                        "vrshr.u16    q8, q10, #8\n\t"
-                        "vrshr.u16    q9, q11, #8\n\t"
-                        "vraddhn.u16  d20, q10, q8\n\t"
-                        "vrshr.u16    q8, q12, #8\n\t"
-                        "vraddhn.u16  d21, q11, q9\n\t"
-                        "vrshr.u16    q9, q13, #8\n\t"
-                        "vraddhn.u16  d22, q12, q8\n\t"
-                        "vraddhn.u16  d23, q13, q9\n\t"
-// result in d20-d23
-                        "vqadd.u8     d20, d0, d20\n\t"
-                        "vqadd.u8     d21, d1, d21\n\t"
-                        "vqadd.u8     d22, d2, d22\n\t"
-                        "vqadd.u8     d23, d3, d23\n\t"
-
-                        "bne  2b\n\t"
-
-                        "1:\n\t"
-                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-
-                        : [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
-                        : [mask] "r" (mask)
-                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-                          "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27",
-                          "d30","d31"
-                        );
+	    asm volatile (
+/* avoid using d8-d15 (q4-q7) aapcs callee-save registers */
+	        "vdup.32      d30, %[mask]\n\t"
+	        "vdup.8       d30, d30[3]\n\t"
+
+	        "vld4.8       {d0-d3}, [%[src]]\n\t"
+	        "vld4.8       {d4-d7}, [%[dst]]\n\t"
+	        "mov  %[keep_dst], %[dst]\n\t"
+
+	        "and  ip, %[w], #7\n\t"
+	        "add  %[src], %[src], ip, LSL#2\n\t"
+	        "add  %[dst], %[dst], ip, LSL#2\n\t"
+	        "subs  %[w], %[w], ip\n\t"
+	        "b 9f\n\t"
+/* LOOP */
+	        "2:\n\t"
+	        "vld4.8       {d0-d3}, [%[src]]!\n\t"
+	        "vld4.8       {d4-d7}, [%[dst]]!\n\t"
+	        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
+	        "sub  %[keep_dst], %[dst], #8*4\n\t"
+	        "subs  %[w], %[w], #8\n\t"
+
+	        "9:\n\t"
+	        "vmull.u8     q10, d30, d0\n\t"
+	        "vmull.u8     q11, d30, d1\n\t"
+	        "vmull.u8     q12, d30, d2\n\t"
+	        "vmull.u8     q13, d30, d3\n\t"
+	        "vrshr.u16    q8, q10, #8\n\t"
+	        "vrshr.u16    q9, q11, #8\n\t"
+	        "vraddhn.u16  d0, q10, q8\n\t"
+	        "vraddhn.u16  d1, q11, q9\n\t"
+	        "vrshr.u16    q9, q13, #8\n\t"
+	        "vrshr.u16    q8, q12, #8\n\t"
+	        "vraddhn.u16  d3, q13, q9\n\t"
+	        "vraddhn.u16  d2, q12, q8\n\t"
+
+	        "vmvn.8       d31, d3\n\t"
+	        "vmull.u8     q10, d31, d4\n\t"
+	        "vmull.u8     q11, d31, d5\n\t"
+	        "vmull.u8     q12, d31, d6\n\t"
+	        "vmull.u8     q13, d31, d7\n\t"
+	        "vrshr.u16    q8, q10, #8\n\t"
+	        "vrshr.u16    q9, q11, #8\n\t"
+	        "vraddhn.u16  d20, q10, q8\n\t"
+	        "vrshr.u16    q8, q12, #8\n\t"
+	        "vraddhn.u16  d21, q11, q9\n\t"
+	        "vrshr.u16    q9, q13, #8\n\t"
+	        "vraddhn.u16  d22, q12, q8\n\t"
+	        "vraddhn.u16  d23, q13, q9\n\t"
+
+/* result in d20-d23 */
+	        "vqadd.u8     d20, d0, d20\n\t"
+	        "vqadd.u8     d21, d1, d21\n\t"
+	        "vqadd.u8     d22, d2, d22\n\t"
+	        "vqadd.u8     d23, d3, d23\n\t"
+
+	        "bne  2b\n\t"
+
+	        "1:\n\t"
+	        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
+
+		: [w] "+r" (w), [src] "+r" (src), [dst] "+r" (dst), [keep_dst] "=r" (keep_dst)
+		: [mask] "r" (mask)
+		: "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+	        "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27",
+	        "d30", "d31"
+	        );
 #endif
-        }
+	}
     }
     else
     {
-        uint8x8_t    alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
+	uint8x8_t alpha_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0707070703030303ULL));
 
-        // Handle width<8
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            src = src_line;
-            src_line += src_stride;
-            w = width;
+	/* Handle width < 8 */
+	while (height--)
+	{
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    src = src_line;
+	    src_line += src_stride;
+	    w = width;
 
-            while (w>=2)
-            {
-                uint8x8_t sval,dval;
+	    while (w >= 2)
+	    {
+		uint8x8_t sval, dval;
 
-                sval = vreinterpret_u8_u32(vld1_u32((void*)src));
-                dval = vreinterpret_u8_u32(vld1_u32((void*)dst));
+		sval = vreinterpret_u8_u32 (vld1_u32 ((void*)src));
+		dval = vreinterpret_u8_u32 (vld1_u32 ((void*)dst));
 
-                /* sval * const alpha_mul */
-                sval = neon2mul(sval,mask_alpha);
+		/* sval * const alpha_mul */
+		sval = neon2mul (sval, mask_alpha);
 
-                /* dval * 255-(src alpha) */
-                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector));
+		/* dval * 255-(src alpha) */
+		dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
 
-                vst1_u8((void*)dst,vqadd_u8(sval,dval));
+		vst1_u8 ((void*)dst, vqadd_u8 (sval, dval));
 
-                src+=2;
-                dst+=2;
-                w-=2;
-            }
+		src += 2;
+		dst += 2;
+		w -= 2;
+	    }
 
-            if (w)
-            {
-                uint8x8_t sval,dval;
+	    if (w)
+	    {
+		uint8x8_t sval, dval;
 
-                sval = vreinterpret_u8_u32(vld1_dup_u32((void*)src));
-                dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));
+		sval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)src));
+		dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst));
 
-                /* sval * const alpha_mul */
-                sval = neon2mul(sval,mask_alpha);
+		/* sval * const alpha_mul */
+		sval = neon2mul (sval, mask_alpha);
 
-                /* dval * 255-(src alpha) */
-                dval = neon2mul(dval,vtbl1_u8(vmvn_u8(sval), alpha_selector));
+		/* dval * 255-(src alpha) */
+		dval = neon2mul (dval, vtbl1_u8 (vmvn_u8 (sval), alpha_selector));
 
-                vst1_lane_u32((void*)dst,vreinterpret_u32_u8(vqadd_u8(sval,dval)),0);
-            }
-        }
+		vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (vqadd_u8 (sval, dval)), 0);
+	    }
+	}
     }
 }
 
-
 static void
-neon_composite_over_n_8_8888 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t      op,
-			       pixman_image_t * src_image,
-			       pixman_image_t * mask_image,
-			       pixman_image_t * dst_image,
-			       int32_t      src_x,
-			       int32_t      src_y,
-			       int32_t      mask_x,
-			       int32_t      mask_y,
-			       int32_t      dest_x,
-			       int32_t      dest_y,
-			       int32_t      width,
-			       int32_t      height)
+neon_composite_over_n_8_8888 (pixman_implementation_t * impl,
+                              pixman_op_t               op,
+                              pixman_image_t *          src_image,
+                              pixman_image_t *          mask_image,
+                              pixman_image_t *          dst_image,
+                              int32_t                   src_x,
+                              int32_t                   src_y,
+                              int32_t                   mask_x,
+                              int32_t                   mask_y,
+                              int32_t                   dest_x,
+                              int32_t                   dest_y,
+                              int32_t                   width,
+                              int32_t                   height)
 {
-    uint32_t	 src, srca;
-    uint32_t	*dst_line, *dst;
-    uint8_t	*mask_line, *mask;
-    int		 dst_stride, mask_stride;
-    uint32_t	 w;
-    uint8x8_t    sval2;
-    uint8x8x4_t  sval8;
-    uint8x8_t    mask_selector=vreinterpret_u8_u64(vcreate_u64(0x0101010100000000ULL));
-    uint8x8_t    alpha_selector=vreinterpret_u8_u64(vcreate_u64(0x0707070703030303ULL));
-
-    src = _pixman_image_get_solid(src_image, dst_image->bits.format);
-
-    // bail out if fully transparent
+    uint32_t src, srca;
+    uint32_t    *dst_line, *dst;
+    uint8_t     *mask_line, *mask;
+    int dst_stride, mask_stride;
+    uint32_t w;
+    uint8x8_t sval2;
+    uint8x8x4_t sval8;
+    uint8x8_t mask_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0101010100000000ULL));
+    uint8x8_t alpha_selector = vreinterpret_u8_u64 (vcreate_u64 (0x0707070703030303ULL));
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+    
+    /* bail out if fully transparent */
     srca = src >> 24;
     if (src == 0)
 	return;
 
-    sval2=vreinterpret_u8_u32(vdup_n_u32(src));
-    sval8.val[0]=vdup_lane_u8(sval2,0);
-    sval8.val[1]=vdup_lane_u8(sval2,1);
-    sval8.val[2]=vdup_lane_u8(sval2,2);
-    sval8.val[3]=vdup_lane_u8(sval2,3);
+    sval2 = vreinterpret_u8_u32 (vdup_n_u32 (src));
+    sval8.val[0] = vdup_lane_u8 (sval2, 0);
+    sval8.val[1] = vdup_lane_u8 (sval2, 1);
+    sval8.val[2] = vdup_lane_u8 (sval2, 2);
+    sval8.val[3] = vdup_lane_u8 (sval2, 3);
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
 
-    if (width>=8)
+    if (width >= 8)
     {
-        // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-        while (height--)
-        {
-            uint32_t *keep_dst=0;
+	/* Use overlapping 8-pixel method, modified to avoid
+	 * rewritten dest being reused
+	 */
+	while (height--)
+	{
+	    uint32_t *keep_dst = 0;
 
-            dst = dst_line;
-            dst_line += dst_stride;
-            mask = mask_line;
-            mask_line += mask_stride;
-            w = width;
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    mask = mask_line;
+	    mask_line += mask_stride;
+	    w = width;
 
 #ifndef USE_GCC_INLINE_ASM
-            uint8x8_t alpha;
-            uint8x8x4_t dval, temp;
-
-            alpha = vld1_u8((void*)mask);
-            dval = vld4_u8((void*)dst);
-            keep_dst = dst;
-
-            temp = neon8mul(sval8,alpha);
-            dval = neon8mul(dval,vmvn_u8(temp.val[3]));
-            temp = neon8qadd(temp,dval);
-
-            mask += (w & 7);
-            dst += (w & 7);
-            w -= (w & 7);
-
-            while (w)
-            {
-                alpha = vld1_u8((void*)mask);
-                dval = vld4_u8((void*)dst);
-
-                vst4_u8((void*)keep_dst,temp);
-                keep_dst = dst;
-
-                temp = neon8mul(sval8,alpha);
-                dval = neon8mul(dval,vmvn_u8(temp.val[3]));
-                temp = neon8qadd(temp,dval);
-
-                mask+=8;
-                dst+=8;
-                w-=8;
-            }
-            vst4_u8((void*)keep_dst,temp);
+	    uint8x8_t alpha;
+	    uint8x8x4_t dval, temp;
+
+	    alpha = vld1_u8 ((void*)mask);
+	    dval = vld4_u8 ((void*)dst);
+	    keep_dst = dst;
+
+	    temp = neon8mul (sval8, alpha);
+	    dval = neon8mul (dval, vmvn_u8 (temp.val[3]));
+	    temp = neon8qadd (temp, dval);
+
+	    mask += (w & 7);
+	    dst += (w & 7);
+	    w -= (w & 7);
+
+	    while (w)
+	    {
+		alpha = vld1_u8 ((void*)mask);
+		dval = vld4_u8 ((void*)dst);
+
+		vst4_u8 ((void*)keep_dst, temp);
+		keep_dst = dst;
+
+		temp = neon8mul (sval8, alpha);
+		dval = neon8mul (dval, vmvn_u8 (temp.val[3]));
+		temp = neon8qadd (temp, dval);
+
+		mask += 8;
+		dst += 8;
+		w -= 8;
+	    }
+	    vst4_u8 ((void*)keep_dst, temp);
 #else
-        asm volatile (
-                        "vdup.32      d0, %[src]\n\t"
-                        "vdup.8       d1, d0[1]\n\t"
-                        "vdup.8       d2, d0[2]\n\t"
-                        "vdup.8       d3, d0[3]\n\t"
-                        "vdup.8       d0, d0[0]\n\t"
-
-                        "vld4.8       {d4-d7}, [%[dst]]\n\t"
-                        "vld1.8       {d31}, [%[mask]]\n\t"
-                        "mov  %[keep_dst], %[dst]\n\t"
-
-                        "and  ip, %[w], #7\n\t"
-                        "add  %[mask], %[mask], ip\n\t"
-                        "add  %[dst], %[dst], ip, LSL#2\n\t"
-                        "subs  %[w], %[w], ip\n\t"
-                        "b 9f\n\t"
-// LOOP
-                        "2:\n\t" 
-                        "vld4.8       {d4-d7}, [%[dst]]!\n\t"
-                        "vld1.8       {d31}, [%[mask]]!\n\t"
-                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-                        "sub  %[keep_dst], %[dst], #8*4\n\t"
-                        "subs  %[w], %[w], #8\n\t"
-                        "9:\n\t"
-
-                        "vmull.u8     q10, d31, d0\n\t"
-                        "vmull.u8     q11, d31, d1\n\t"
-                        "vmull.u8     q12, d31, d2\n\t"
-                        "vmull.u8     q13, d31, d3\n\t"
-                        "vrshr.u16    q8, q10, #8\n\t"
-                        "vrshr.u16    q9, q11, #8\n\t"
-                        "vraddhn.u16  d20, q10, q8\n\t"
-                        "vraddhn.u16  d21, q11, q9\n\t"
-                        "vrshr.u16    q9, q13, #8\n\t"
-                        "vrshr.u16    q8, q12, #8\n\t"
-                        "vraddhn.u16  d23, q13, q9\n\t"
-                        "vraddhn.u16  d22, q12, q8\n\t"
-
-                        "vmvn.8       d30, d23\n\t"
-                        "vmull.u8     q12, d30, d4\n\t"
-                        "vmull.u8     q13, d30, d5\n\t"
-                        "vmull.u8     q14, d30, d6\n\t"
-                        "vmull.u8     q15, d30, d7\n\t"
-
-                        "vrshr.u16    q8, q12, #8\n\t"
-                        "vrshr.u16    q9, q13, #8\n\t"
-                        "vraddhn.u16  d4, q12, q8\n\t"
-                        "vrshr.u16    q8, q14, #8\n\t"
-                        "vraddhn.u16  d5, q13, q9\n\t"
-                        "vrshr.u16    q9, q15, #8\n\t"
-                        "vraddhn.u16  d6, q14, q8\n\t"
-                        "vraddhn.u16  d7, q15, q9\n\t"
-// result in d4-d7
-
-                        "vqadd.u8     d20, d4, d20\n\t"
-                        "vqadd.u8     d21, d5, d21\n\t"
-                        "vqadd.u8     d22, d6, d22\n\t"
-                        "vqadd.u8     d23, d7, d23\n\t"
-
-                        "bne 2b\n\t"
-
-                        "1:\n\t"
-                        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
-
-                        : [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst)
-                        : [src] "r" (src) 
-                        : "ip", "cc", "memory", "d0","d1","d2","d3","d4","d5","d6","d7",
-                          "d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29",
-                          "d30","d31"
-                        );
+	    asm volatile (
+	        "vdup.32      d0, %[src]\n\t"
+	        "vdup.8       d1, d0[1]\n\t"
+	        "vdup.8       d2, d0[2]\n\t"
+	        "vdup.8       d3, d0[3]\n\t"
+	        "vdup.8       d0, d0[0]\n\t"
+
+	        "vld4.8       {d4-d7}, [%[dst]]\n\t"
+	        "vld1.8       {d31}, [%[mask]]\n\t"
+	        "mov  %[keep_dst], %[dst]\n\t"
+
+	        "and  ip, %[w], #7\n\t"
+	        "add  %[mask], %[mask], ip\n\t"
+	        "add  %[dst], %[dst], ip, LSL#2\n\t"
+	        "subs  %[w], %[w], ip\n\t"
+	        "b 9f\n\t"
+/* LOOP */
+	        "2:\n\t"
+	        "vld4.8       {d4-d7}, [%[dst]]!\n\t"
+	        "vld1.8       {d31}, [%[mask]]!\n\t"
+	        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
+	        "sub  %[keep_dst], %[dst], #8*4\n\t"
+	        "subs  %[w], %[w], #8\n\t"
+	        "9:\n\t"
+
+	        "vmull.u8     q10, d31, d0\n\t"
+	        "vmull.u8     q11, d31, d1\n\t"
+	        "vmull.u8     q12, d31, d2\n\t"
+	        "vmull.u8     q13, d31, d3\n\t"
+	        "vrshr.u16    q8, q10, #8\n\t"
+	        "vrshr.u16    q9, q11, #8\n\t"
+	        "vraddhn.u16  d20, q10, q8\n\t"
+	        "vraddhn.u16  d21, q11, q9\n\t"
+	        "vrshr.u16    q9, q13, #8\n\t"
+	        "vrshr.u16    q8, q12, #8\n\t"
+	        "vraddhn.u16  d23, q13, q9\n\t"
+	        "vraddhn.u16  d22, q12, q8\n\t"
+
+	        "vmvn.8       d30, d23\n\t"
+	        "vmull.u8     q12, d30, d4\n\t"
+	        "vmull.u8     q13, d30, d5\n\t"
+	        "vmull.u8     q14, d30, d6\n\t"
+	        "vmull.u8     q15, d30, d7\n\t"
+
+	        "vrshr.u16    q8, q12, #8\n\t"
+	        "vrshr.u16    q9, q13, #8\n\t"
+	        "vraddhn.u16  d4, q12, q8\n\t"
+	        "vrshr.u16    q8, q14, #8\n\t"
+	        "vraddhn.u16  d5, q13, q9\n\t"
+	        "vrshr.u16    q9, q15, #8\n\t"
+	        "vraddhn.u16  d6, q14, q8\n\t"
+	        "vraddhn.u16  d7, q15, q9\n\t"
+/* result in d4-d7 */
+
+	        "vqadd.u8     d20, d4, d20\n\t"
+	        "vqadd.u8     d21, d5, d21\n\t"
+	        "vqadd.u8     d22, d6, d22\n\t"
+	        "vqadd.u8     d23, d7, d23\n\t"
+
+	        "bne 2b\n\t"
+
+	        "1:\n\t"
+	        "vst4.8       {d20-d23}, [%[keep_dst]]\n\t"
+
+		: [w] "+r" (w), [dst] "+r" (dst), [mask] "+r" (mask), [keep_dst] "=r" (keep_dst)
+		: [src] "r" (src)
+		: "ip", "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
+	        "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "d25", "d26", "d27", "d28", "d29",
+	        "d30", "d31"
+	        );
 #endif
-        }
+	}
     }
     else
     {
-        while (height--)
-        {
-            uint8x8_t alpha;
+	while (height--)
+	{
+	    uint8x8_t alpha;
+
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    mask = mask_line;
+	    mask_line += mask_stride;
+	    w = width;
 
-            dst = dst_line;
-            dst_line += dst_stride;
-            mask = mask_line;
-            mask_line += mask_stride;
-            w = width;
+	    while (w >= 2)
+	    {
+		uint8x8_t dval, temp, res;
 
-            while (w>=2)
-            {
-                uint8x8_t dval, temp, res;
+		alpha = vtbl1_u8 (
+		    vreinterpret_u8_u16 (vld1_dup_u16 ((void*)mask)), mask_selector);
+		dval = vld1_u8 ((void*)dst);
 
-                alpha = vtbl1_u8(vreinterpret_u8_u16(vld1_dup_u16((void*)mask)), mask_selector);
-                dval = vld1_u8((void*)dst);
+		temp = neon2mul (sval2, alpha);
+		res = vqadd_u8 (
+		    temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector)));
 
-                temp = neon2mul(sval2,alpha);
-                res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector)));
+		vst1_u8 ((void*)dst, res);
 
-                vst1_u8((void*)dst,res);
+		mask += 2;
+		dst += 2;
+		w -= 2;
+	    }
 
-                mask+=2;
-                dst+=2;
-                w-=2;
-            }
-            if (w)
-            {
-                uint8x8_t dval, temp, res;
+	    if (w)
+	    {
+		uint8x8_t dval, temp, res;
 
-                alpha = vtbl1_u8(vld1_dup_u8((void*)mask), mask_selector);
-                dval = vreinterpret_u8_u32(vld1_dup_u32((void*)dst));
+		alpha = vtbl1_u8 (vld1_dup_u8 ((void*)mask), mask_selector);
+		dval = vreinterpret_u8_u32 (vld1_dup_u32 ((void*)dst));
 
-                temp = neon2mul(sval2,alpha);
-                res = vqadd_u8(temp,neon2mul(dval,vtbl1_u8(vmvn_u8(temp), alpha_selector)));
+		temp = neon2mul (sval2, alpha);
+		res = vqadd_u8 (
+		    temp, neon2mul (dval, vtbl1_u8 (vmvn_u8 (temp), alpha_selector)));
 
-                vst1_lane_u32((void*)dst,vreinterpret_u32_u8(res),0);
-            }
-        }
+		vst1_lane_u32 ((void*)dst, vreinterpret_u32_u8 (res), 0);
+	    }
+	}
     }
 }
 
-
 static void
-neon_composite_add_8888_8_8 (
-                            pixman_implementation_t * impl,
-                            pixman_op_t op,
-                            pixman_image_t * src_image,
-                            pixman_image_t * mask_image,
-                            pixman_image_t * dst_image,
-                            int32_t      src_x,
-                            int32_t      src_y,
-                            int32_t      mask_x,
-                            int32_t      mask_y,
-                            int32_t      dest_x,
-                            int32_t      dest_y,
-                            int32_t      width,
-                            int32_t      height)
+neon_composite_add_8888_8_8 (pixman_implementation_t * impl,
+                             pixman_op_t               op,
+                             pixman_image_t *          src_image,
+                             pixman_image_t *          mask_image,
+                             pixman_image_t *          dst_image,
+                             int32_t                   src_x,
+                             int32_t                   src_y,
+                             int32_t                   mask_x,
+                             int32_t                   mask_y,
+                             int32_t                   dest_x,
+                             int32_t                   dest_y,
+                             int32_t                   width,
+                             int32_t                   height)
 {
     uint8_t     *dst_line, *dst;
     uint8_t     *mask_line, *mask;
     int dst_stride, mask_stride;
-    uint32_t    w;
-    uint32_t    src;
-    uint8x8_t   sa;
+    uint32_t w;
+    uint32_t src;
+    uint8x8_t sa;
 
     PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
     src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-    sa = vdup_n_u8((src) >> 24);
+    sa = vdup_n_u8 ((src) >> 24);
 
-    if (width>=8)
+    if (width >= 8)
     {
-        // Use overlapping 8-pixel method, modified to avoid rewritten dest being reused
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            mask = mask_line;
-            mask_line += mask_stride;
-            w = width;
-
-            uint8x8_t mval, dval, res;
-            uint8_t     *keep_dst;
-
-            mval = vld1_u8((void *)mask);
-            dval = vld1_u8((void *)dst);
-            keep_dst = dst;
-
-            res = vqadd_u8(neon2mul(mval,sa),dval);
-
-            mask += (w & 7);
-            dst += (w & 7);
-            w -= w & 7;
-
-            while (w)
-            {
-                mval = vld1_u8((void *)mask);
-                dval = vld1_u8((void *)dst);
-                vst1_u8((void *)keep_dst, res);
-                keep_dst = dst;
-
-                res = vqadd_u8(neon2mul(mval,sa),dval);
-
-                mask += 8;
-                dst += 8;
-                w -= 8;
-            }
-            vst1_u8((void *)keep_dst, res);
-        }
+	/* Use overlapping 8-pixel method, modified to avoid rewritten dest being reused */
+	while (height--)
+	{
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    mask = mask_line;
+	    mask_line += mask_stride;
+	    w = width;
+
+	    uint8x8_t mval, dval, res;
+	    uint8_t     *keep_dst;
+
+	    mval = vld1_u8 ((void *)mask);
+	    dval = vld1_u8 ((void *)dst);
+	    keep_dst = dst;
+
+	    res = vqadd_u8 (neon2mul (mval, sa), dval);
+
+	    mask += (w & 7);
+	    dst += (w & 7);
+	    w -= w & 7;
+
+	    while (w)
+	    {
+		mval = vld1_u8 ((void *)mask);
+		dval = vld1_u8 ((void *)dst);
+		vst1_u8 ((void *)keep_dst, res);
+		keep_dst = dst;
+
+		res = vqadd_u8 (neon2mul (mval, sa), dval);
+
+		mask += 8;
+		dst += 8;
+		w -= 8;
+	    }
+	    vst1_u8 ((void *)keep_dst, res);
+	}
     }
     else
     {
-        // Use 4/2/1 load/store method to handle 1-7 pixels
-        while (height--)
-        {
-            dst = dst_line;
-            dst_line += dst_stride;
-            mask = mask_line;
-            mask_line += mask_stride;
-            w = width;
-
-            uint8x8_t mval=sa, dval=sa, res;
-            uint8_t *dst4=0, *dst2=0;
-
-            if (w&4)
-            {
-                mval = vreinterpret_u8_u32(vld1_lane_u32((void *)mask, vreinterpret_u32_u8(mval), 1));
-                dval = vreinterpret_u8_u32(vld1_lane_u32((void *)dst, vreinterpret_u32_u8(dval), 1));
-
-                dst4 = dst;
-                mask += 4;
-                dst += 4;
-            }
-            if (w&2)
-            {
-                mval = vreinterpret_u8_u16(vld1_lane_u16((void *)mask, vreinterpret_u16_u8(mval), 1));
-                dval = vreinterpret_u8_u16(vld1_lane_u16((void *)dst, vreinterpret_u16_u8(dval), 1));
-                dst2 = dst;
-                mask += 2;
-                dst += 2;
-            }
-            if (w&1)
-            {
-                mval = vld1_lane_u8(mask, mval, 1);
-                dval = vld1_lane_u8(dst, dval, 1);
-            }
-
-            res = vqadd_u8(neon2mul(mval,sa),dval);
-
-            if (w&1)
-                vst1_lane_u8(dst, res, 1);
-            if (w&2)
-                vst1_lane_u16((void *)dst2, vreinterpret_u16_u8(res), 1);
-            if (w&4)
-                vst1_lane_u32((void *)dst4, vreinterpret_u32_u8(res), 1);
-        }
+	/* Use 4/2/1 load/store method to handle 1-7 pixels */
+	while (height--)
+	{
+	    dst = dst_line;
+	    dst_line += dst_stride;
+	    mask = mask_line;
+	    mask_line += mask_stride;
+	    w = width;
+
+	    uint8x8_t mval = sa, dval = sa, res;
+	    uint8_t *dst4 = 0, *dst2 = 0;
+
+	    if (w & 4)
+	    {
+		mval = vreinterpret_u8_u32 (
+		    vld1_lane_u32 ((void *)mask, vreinterpret_u32_u8 (mval), 1));
+		dval = vreinterpret_u8_u32 (
+		    vld1_lane_u32 ((void *)dst, vreinterpret_u32_u8 (dval), 1));
+
+		dst4 = dst;
+		mask += 4;
+		dst += 4;
+	    }
+
+	    if (w & 2)
+	    {
+		mval = vreinterpret_u8_u16 (
+		    vld1_lane_u16 ((void *)mask, vreinterpret_u16_u8 (mval), 1));
+		dval = vreinterpret_u8_u16 (
+		    vld1_lane_u16 ((void *)dst, vreinterpret_u16_u8 (dval), 1));
+		dst2 = dst;
+		mask += 2;
+		dst += 2;
+	    }
+
+	    if (w & 1)
+	    {
+		mval = vld1_lane_u8 (mask, mval, 1);
+		dval = vld1_lane_u8 (dst, dval, 1);
+	    }
+
+	    res = vqadd_u8 (neon2mul (mval, sa), dval);
+
+	    if (w & 1)
+		vst1_lane_u8 (dst, res, 1);
+	    if (w & 2)
+		vst1_lane_u16 ((void *)dst2, vreinterpret_u16_u8 (res), 1);
+	    if (w & 4)
+		vst1_lane_u32 ((void *)dst4, vreinterpret_u32_u8 (res), 1);
+	}
     }
 }
 
 #ifdef USE_GCC_INLINE_ASM
 
 static void
-neon_composite_src_16_16 (
-	pixman_implementation_t * impl,
-	pixman_op_t op,
-	pixman_image_t * src_image,
-	pixman_image_t * mask_image,
-	pixman_image_t * dst_image,
-	int32_t      src_x,
-	int32_t      src_y,
-	int32_t      mask_x,
-	int32_t      mask_y,
-	int32_t      dest_x,
-	int32_t      dest_y,
-	int32_t      width,
-	int32_t      height)
+neon_composite_src_16_16 (pixman_implementation_t * impl,
+                          pixman_op_t               op,
+                          pixman_image_t *          src_image,
+                          pixman_image_t *          mask_image,
+                          pixman_image_t *          dst_image,
+                          int32_t                   src_x,
+                          int32_t                   src_y,
+                          int32_t                   mask_x,
+                          int32_t                   mask_y,
+                          int32_t                   dest_x,
+                          int32_t                   dest_y,
+                          int32_t                   width,
+                          int32_t                   height)
 {
-	uint16_t    *dst_line, *src_line;
-	uint32_t     dst_stride, src_stride;
+    uint16_t    *dst_line, *src_line;
+    uint32_t dst_stride, src_stride;
 
-	if(!height || !width)
-		return;
+    if (!height || !width)
+	return;
 
-	/* We simply copy 16-bit-aligned pixels from one place to another. */
-	PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
-	PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    /* We simply copy 16-bit-aligned pixels from one place to another. */
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
-	/* Preload the first input scanline */
-	{
-		uint16_t *src_ptr = src_line;
-		uint32_t count = width;
-
-		asm volatile (
-		"0: @ loop							\n"
-		"	subs    %[count], %[count], #32				\n"
-		"	pld     [%[src]]					\n"
-		"	add     %[src], %[src], #64				\n"
-		"	bgt 0b							\n"
-
-		// Clobbered input registers marked as input/outputs
-		: [src] "+r" (src_ptr), [count] "+r" (count)
-		: // no unclobbered inputs
-		: "cc"
-		);
-	}
+    /* Preload the first input scanline */
+    {
+	uint16_t *src_ptr = src_line;
+	uint32_t count = width;
 
-	while(height--) {
-		uint16_t *dst_ptr = dst_line;
-		uint16_t *src_ptr = src_line;
-		uint32_t count = width;
-		uint32_t tmp = 0;
-
-		// Uses multi-register access and preloading to maximise bandwidth.
-		// Each pixel is one halfword, so a quadword contains 8px.
-		// Preload frequency assumed a 64-byte cacheline.
-		asm volatile (
-		"	cmp       %[count], #64				\n"
-		"	blt 1f    @ skip oversized fragments		\n"
-		"0: @ start with eight quadwords at a time		\n"
-		"	pld       [%[src], %[src_stride], LSL #1]	\n" // preload from next scanline
-		"	sub       %[count], %[count], #64		\n"
-		"	vld1.16   {d16,d17,d18,d19}, [%[src]]!		\n"
-		"	vld1.16   {d20,d21,d22,d23}, [%[src]]!		\n"
-		"	pld       [%[src], %[src_stride], LSL #1]	\n" // preload from next scanline
-		"	vld1.16   {d24,d25,d26,d27}, [%[src]]!		\n"
-		"	vld1.16   {d28,d29,d30,d31}, [%[src]]!		\n"
-		"	cmp       %[count], #64				\n"
-		"	vst1.16   {d16,d17,d18,d19}, [%[dst]]!		\n"
-		"	vst1.16   {d20,d21,d22,d23}, [%[dst]]!		\n"
-		"	vst1.16   {d24,d25,d26,d27}, [%[dst]]!		\n"
-		"	vst1.16   {d28,d29,d30,d31}, [%[dst]]!		\n"
-		"	bge 0b						\n"
-		"	cmp       %[count], #0				\n"
-		"	beq 7f    @ aligned fastpath			\n"
-		"1: @ four quadwords					\n"
-		"	tst       %[count], #32				\n"
-		"	beq 2f    @ skip oversized fragment		\n"
-		"	pld       [%[src], %[src_stride], LSL #1]	\n" // preload from next scanline
-		"	vld1.16   {d16,d17,d18,d19}, [%[src]]!		\n"
-		"	vld1.16   {d20,d21,d22,d23}, [%[src]]!		\n"
-		"	vst1.16   {d16,d17,d18,d19}, [%[dst]]!		\n"
-		"	vst1.16   {d20,d21,d22,d23}, [%[dst]]!		\n"
-		"2: @ two quadwords					\n"
-		"	tst       %[count], #16				\n"
-		"	beq 3f    @ skip oversized fragment		\n"
-		"	pld       [%[src], %[src_stride], LSL #1]	\n" // preload from next scanline
-		"	vld1.16   {d16,d17,d18,d19}, [%[src]]!		\n"
-		"	vst1.16   {d16,d17,d18,d19}, [%[dst]]!		\n"
-		"3: @ one quadword					\n"
-		"	tst       %[count], #8				\n"
-		"	beq 4f    @ skip oversized fragment		\n"
-		"	vld1.16   {d16,d17}, [%[src]]!			\n"
-		"	vst1.16   {d16,d17}, [%[dst]]!			\n"
-		"4: @ one doubleword					\n"
-		"	tst       %[count], #4				\n"
-		"	beq 5f    @ skip oversized fragment		\n"
-		"	vld1.16   {d16}, [%[src]]!			\n"
-		"	vst1.16   {d16}, [%[dst]]!			\n"
-		"5: @ one word						\n"
-		"	tst       %[count], #2				\n"
-		"	beq 6f    @ skip oversized fragment		\n"
-		"	ldr       %[tmp], [%[src]], #4			\n"
-		"	str       %[tmp], [%[dst]], #4			\n"
-		"6: @ one halfword					\n"
-		"	tst       %[count], #1				\n"
-		"	beq 7f    @ skip oversized fragment		\n"
-		"	ldrh      %[tmp], [%[src]]			\n"
-		"	strh      %[tmp], [%[dst]]			\n"
-		"7: @ end						\n"
-
-		// Clobbered input registers marked as input/outputs
-		: [dst] "+r" (dst_ptr), [src] "+r" (src_ptr), [count] "+r" (count), [tmp] "+r" (tmp)
-
-		// Unclobbered input
-		: [src_stride] "r" (src_stride)
-
-		// Clobbered vector registers
-		// NB: these are the quad aliases of the double registers used in the asm
-		: "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "cc", "memory"
-		);
-
-		src_line += src_stride;
-		dst_line += dst_stride;
-	}
+	asm volatile (
+	    "0: @ loop							\n"
+	    "	subs    %[count], %[count], #32				\n"
+	    "	pld     [%[src]]					\n"
+	    "	add     %[src], %[src], #64				\n"
+	    "	bgt 0b							\n"
+
+	    /* Clobbered input registers marked as input/outputs */
+	    : [src] "+r" (src_ptr), [count] "+r" (count)
+	    :     /* no unclobbered inputs */
+	    : "cc"
+	    );
+    }
+
+    while (height--)
+    {
+	uint16_t *dst_ptr = dst_line;
+	uint16_t *src_ptr = src_line;
+	uint32_t count = width;
+	uint32_t tmp = 0;
+
+	/* Uses multi-register access and preloading to maximise bandwidth.
+	 * Each pixel is one halfword, so a quadword contains 8px.
+	 * Preload frequency assumed a 64-byte cacheline.
+	 */
+	asm volatile (
+	    "	cmp       %[count], #64				\n"
+	    "	blt 1f    @ skip oversized fragments		\n"
+	    "0: @ start with eight quadwords at a time		\n"
+	    /* preload from next scanline */
+	    "	pld       [%[src], %[src_stride], LSL #1]	\n"
+	    "	sub       %[count], %[count], #64		\n"
+	    "	vld1.16   {d16,d17,d18,d19}, [%[src]]!		\n"
+	    "	vld1.16   {d20,d21,d22,d23}, [%[src]]!		\n"
+	    /* preload from next scanline */
+	    "	pld       [%[src], %[src_stride], LSL #1]	\n"
+	    "	vld1.16   {d24,d25,d26,d27}, [%[src]]!		\n"
+	    "	vld1.16   {d28,d29,d30,d31}, [%[src]]!		\n"
+	    "	cmp       %[count], #64				\n"
+	    "	vst1.16   {d16,d17,d18,d19}, [%[dst]]!		\n"
+	    "	vst1.16   {d20,d21,d22,d23}, [%[dst]]!		\n"
+	    "	vst1.16   {d24,d25,d26,d27}, [%[dst]]!		\n"
+	    "	vst1.16   {d28,d29,d30,d31}, [%[dst]]!		\n"
+	    "	bge 0b						\n"
+	    "	cmp       %[count], #0				\n"
+	    "	beq 7f    @ aligned fastpath			\n"
+	    "1: @ four quadwords				\n"
+	    "	tst       %[count], #32				\n"
+	    "	beq 2f    @ skip oversized fragment		\n"
+	    /* preload from next scanline */
+	    "	pld       [%[src], %[src_stride], LSL #1]	\n"
+	    "	vld1.16   {d16,d17,d18,d19}, [%[src]]!		\n"
+	    "	vld1.16   {d20,d21,d22,d23}, [%[src]]!		\n"
+	    "	vst1.16   {d16,d17,d18,d19}, [%[dst]]!		\n"
+	    "	vst1.16   {d20,d21,d22,d23}, [%[dst]]!		\n"
+	    "2: @ two quadwords					\n"
+	    "	tst       %[count], #16				\n"
+	    "	beq 3f    @ skip oversized fragment		\n"
+	    /* preload from next scanline */
+	    "	pld       [%[src], %[src_stride], LSL #1]	\n"
+	    "	vld1.16   {d16,d17,d18,d19}, [%[src]]!		\n"
+	    "	vst1.16   {d16,d17,d18,d19}, [%[dst]]!		\n"
+	    "3: @ one quadword					\n"
+	    "	tst       %[count], #8				\n"
+	    "	beq 4f    @ skip oversized fragment		\n"
+	    "	vld1.16   {d16,d17}, [%[src]]!			\n"
+	    "	vst1.16   {d16,d17}, [%[dst]]!			\n"
+	    "4: @ one doubleword				\n"
+	    "	tst       %[count], #4				\n"
+	    "	beq 5f    @ skip oversized fragment		\n"
+	    "	vld1.16   {d16}, [%[src]]!			\n"
+	    "	vst1.16   {d16}, [%[dst]]!			\n"
+	    "5: @ one word					\n"
+	    "	tst       %[count], #2				\n"
+	    "	beq 6f    @ skip oversized fragment		\n"
+	    "	ldr       %[tmp], [%[src]], #4			\n"
+	    "	str       %[tmp], [%[dst]], #4			\n"
+	    "6: @ one halfword					\n"
+	    "	tst       %[count], #1				\n"
+	    "	beq 7f    @ skip oversized fragment		\n"
+	    "	ldrh      %[tmp], [%[src]]			\n"
+	    "	strh      %[tmp], [%[dst]]			\n"
+	    "7: @ end						\n"
+
+	    /* Clobbered input registers marked as input/outputs */
+	    : [dst] "+r" (dst_ptr), [src] "+r" (src_ptr),
+	      [count] "+r" (count), [tmp] "+r" (tmp)
+
+	      /* Unclobbered input */
+	    : [src_stride] "r" (src_stride)
+
+	      /* Clobbered vector registers */
+	      
+	      /* NB: these are the quad aliases of the double
+	       * registers used in the asm
+	       */
+	    : "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "cc", "memory"
+	    );
+
+	src_line += src_stride;
+	dst_line += dst_stride;
+    }
 }
 
 #endif /* USE_GCC_INLINE_ASM */
 
 static void
-neon_composite_src_24_16 (
-	pixman_implementation_t * impl,
-	pixman_op_t op,
-	pixman_image_t * src_image,
-	pixman_image_t * mask_image,
-	pixman_image_t * dst_image,
-	int32_t      src_x,
-	int32_t      src_y,
-	int32_t      mask_x,
-	int32_t      mask_y,
-	int32_t      dest_x,
-	int32_t      dest_y,
-	int32_t      width,
-	int32_t      height)
+neon_composite_src_24_16 (pixman_implementation_t * impl,
+                          pixman_op_t               op,
+                          pixman_image_t *          src_image,
+                          pixman_image_t *          mask_image,
+                          pixman_image_t *          dst_image,
+                          int32_t                   src_x,
+                          int32_t                   src_y,
+                          int32_t                   mask_x,
+                          int32_t                   mask_y,
+                          int32_t                   dest_x,
+                          int32_t                   dest_y,
+                          int32_t                   width,
+                          int32_t                   height)
 {
-	uint16_t    *dst_line;
-	uint32_t    *src_line;
-	uint32_t     dst_stride, src_stride;
+    uint16_t    *dst_line;
+    uint32_t    *src_line;
+    uint32_t dst_stride, src_stride;
 
-	if(!width || !height)
-		return;
+    if (!width || !height)
+	return;
 
-	/* We simply copy pixels from one place to another, assuming that the source's alpha is opaque. */
-	PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-	PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    /* We simply copy pixels from one place to another,
+     * assuming that the source's alpha is opaque.
+     */
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
-	/* Preload the first input scanline */
-	{
-		uint8_t *src_ptr = (uint8_t*) src_line;
-		uint32_t count = (width + 15) / 16;
+    /* Preload the first input scanline */
+    {
+	uint8_t *src_ptr = (uint8_t*) src_line;
+	uint32_t count = (width + 15) / 16;
 
 #ifdef USE_GCC_INLINE_ASM
-		asm volatile (
-		"0: @ loop						\n"
-		"	subs    %[count], %[count], #1			\n"
-		"	pld     [%[src]]				\n"
-		"	add     %[src], %[src], #64			\n"
-		"	bgt 0b						\n"
-
-		// Clobbered input registers marked as input/outputs
-		: [src] "+r" (src_ptr), [count] "+r" (count)
-		: // no unclobbered inputs
-		: "cc"
-		);
+	asm volatile (
+	    "0: @ loop						\n"
+	    "	subs    %[count], %[count], #1			\n"
+	    "	pld     [%[src]]				\n"
+	    "	add     %[src], %[src], #64			\n"
+	    "	bgt 0b						\n"
+
+	    /* Clobbered input registers marked as input/outputs */
+	    : [src] "+r" (src_ptr), [count] "+r" (count)
+	    :     /* no unclobbered inputs */
+	    : "cc"
+	    );
 #else
-		do {
-			__pld(src_ptr);
-			src_ptr += 64;
-		} while(--count);
-#endif
+	do
+	{
+	    __pld (src_ptr);
+	    src_ptr += 64;
 	}
+	while (--count);
+#endif
+    }
 
-	while(height--) {
-		uint16_t *dst_ptr = dst_line;
-		uint32_t *src_ptr = src_line;
-		uint32_t count = width;
-		const uint32_t rb_mask = 0x1F;
-		const uint32_t g_mask = 0x3F;
-
-		// If you're going to complain about a goto, take a long hard look
-		// at the massive blocks of assembler this skips over.  ;-)
-		if(count < 8)
-			goto small_stuff;
+    while (height--)
+    {
+	uint16_t *dst_ptr = dst_line;
+	uint32_t *src_ptr = src_line;
+	uint32_t count = width;
+	const uint32_t rb_mask = 0x1F;
+	const uint32_t g_mask = 0x3F;
+
+	/* If you're going to complain about a goto, take a long hard look
+	 * at the massive blocks of assembler this skips over.  ;-)
+	 */
+	if (count < 8)
+	    goto small_stuff;
 
 #ifdef USE_GCC_INLINE_ASM
 
-		// This is not as aggressive as the RGB565-source case.
-		// Generally the source is in cached RAM when the formats are different, so we use preload.
-		// We don't need to blend, so we are not reading from the uncached framebuffer.
-		asm volatile (
-		"	cmp       %[count], #16										\n"
-		"	blt 1f    @ skip oversized fragments								\n"
-		"0: @ start with sixteen pixels at a time								\n"
-		"	sub       %[count], %[count], #16								\n"
-		"	pld      [%[src], %[src_stride], lsl #2]         @ preload from next scanline			\n"
-		"	vld4.8    {d0,d1,d2,d3}, [%[src]]!		@ d3 is alpha and ignored, d2-0 are rgb.	\n"
-		"	vld4.8    {d4,d5,d6,d7}, [%[src]]!		@ d7 is alpha and ignored, d6-4 are rgb.	\n"
-		"	vshll.u8  q8, d2, #8				@ expand first red for repacking		\n"
-		"	vshll.u8  q10, d1, #8				@ expand first green for repacking		\n"
-		"	vshll.u8  q11, d0, #8				@ expand first blue for repacking		\n"
-		"	vshll.u8  q9, d6, #8				@ expand second red for repacking		\n"
-		"	vsri.u16  q8, q10, #5				@ insert first green after red			\n"
-		"	vshll.u8  q10, d5, #8				@ expand second green for repacking		\n"
-		"	vsri.u16  q8, q11, #11				@ insert first blue after green			\n"
-		"	vshll.u8  q11, d4, #8				@ expand second blue for repacking		\n"
-		"	vsri.u16  q9, q10, #5				@ insert second green after red			\n"
-		"	vsri.u16  q9, q11, #11				@ insert second blue after green		\n"
-		"	cmp       %[count], #16										\n"
-		"	vst1.16   {d16,d17,d18,d19}, [%[dst]]!          @ store 16 pixels				\n"
-		"	bge 0b												\n"
-		"1: @ end of main loop	\n"
-		"	cmp       %[count], #8				@ can we still do an 8-pixel block?		\n"
-		"	blt 2f												\n"
-		"	sub       %[count], %[count], #8	\n"
-		"	pld      [%[src], %[src_stride], lsl #2]         @ preload from next scanline			\n"
-		"	vld4.8    {d0,d1,d2,d3}, [%[src]]!		@ d3 is alpha and ignored, d2-0 are rgb.	\n"
-		"	vshll.u8  q8, d2, #8				@ expand first red for repacking		\n"
-		"	vshll.u8  q10, d1, #8				@ expand first green for repacking		\n"
-		"	vshll.u8  q11, d0, #8				@ expand first blue for repacking		\n"
-		"	vsri.u16  q8, q10, #5				@ insert first green after red			\n"
-		"	vsri.u16  q8, q11, #11				@ insert first blue after green			\n"
-		"	vst1.16   {d16,d17}, [%[dst]]!          @ store 8 pixels				\n"
-		"2: @ end												\n"
-
-		// Clobbered input and working registers marked as input/outputs
-		: [dst] "+r" (dst_ptr), [src] "+r" (src_ptr), [count] "+r" (count)
-
-		// Unclobbered input
-		: [src_stride] "r" (src_stride)
-
-		// Clobbered vector registers
-		// NB: these are the quad aliases of the double registers used in the asm
-		: "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "cc", "memory"
-		);
+	/* This is not as aggressive as the RGB565-source case.
+	 * Generally the source is in cached RAM when the formats are
+	 * different, so we use preload.
+	 * 
+	 * We don't need to blend, so we are not reading from the
+	 * uncached framebuffer.
+	 */
+	asm volatile (
+	    "	cmp       %[count], #16				\n"
+	    "	blt 1f    @ skip oversized fragments		\n"
+	    "0: @ start with sixteen pixels at a time		\n"
+	    "	sub       %[count], %[count], #16		\n"
+	    "	pld      [%[src], %[src_stride], lsl #2]        @ preload from next scanline			\n"
+	    "	vld4.8    {d0,d1,d2,d3}, [%[src]]!		@ d3 is alpha and ignored, d2-0 are rgb.	\n"
+	    "	vld4.8    {d4,d5,d6,d7}, [%[src]]!		@ d7 is alpha and ignored, d6-4 are rgb.	\n"
+	    "	vshll.u8  q8, d2, #8				@ expand first red for repacking		\n"
+	    "	vshll.u8  q10, d1, #8				@ expand first green for repacking		\n"
+	    "	vshll.u8  q11, d0, #8				@ expand first blue for repacking		\n"
+	    "	vshll.u8  q9, d6, #8				@ expand second red for repacking		\n"
+	    "	vsri.u16  q8, q10, #5				@ insert first green after red			\n"
+	    "	vshll.u8  q10, d5, #8				@ expand second green for repacking		\n"
+	    "	vsri.u16  q8, q11, #11				@ insert first blue after green			\n"
+	    "	vshll.u8  q11, d4, #8				@ expand second blue for repacking		\n"
+	    "	vsri.u16  q9, q10, #5				@ insert second green after red			\n"
+	    "	vsri.u16  q9, q11, #11				@ insert second blue after green		\n"
+	    "	cmp       %[count], #16				\n"
+	    "	vst1.16   {d16,d17,d18,d19}, [%[dst]]!          @ store 16 pixels				\n"
+	    "	bge 0b						\n"
+	    "1: @ end of main loop				\n"
+	    "	cmp       %[count], #8				@ can we still do an 8-pixel block?		\n"
+	    "	blt 2f						\n"
+	    "	sub       %[count], %[count], #8		\n"
+	    "	pld      [%[src], %[src_stride], lsl #2]        @ preload from next scanline			\n"
+	    "	vld4.8    {d0,d1,d2,d3}, [%[src]]!		@ d3 is alpha and ignored, d2-0 are rgb.	\n"
+	    "	vshll.u8  q8, d2, #8				@ expand first red for repacking		\n"
+	    "	vshll.u8  q10, d1, #8				@ expand first green for repacking		\n"
+	    "	vshll.u8  q11, d0, #8				@ expand first blue for repacking		\n"
+	    "	vsri.u16  q8, q10, #5				@ insert first green after red			\n"
+	    "	vsri.u16  q8, q11, #11				@ insert first blue after green			\n"
+	    "	vst1.16   {d16,d17}, [%[dst]]!          @ store 8 pixels				\n"
+	    "2: @ end						\n"
+
+	    /* Clobbered input and working registers marked as input/outputs */
+	    : [dst] "+r" (dst_ptr), [src] "+r" (src_ptr), [count] "+r" (count)
+
+	      /* Unclobbered input */
+	    : [src_stride] "r" (src_stride)
+
+	      /* Clobbered vector registers */
+
+	      /* NB: these are the quad aliases of the
+	       * double registers used in the asm
+	       */
+	    : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "cc", "memory"
+	    );
 #else
-		// A copy of the above code, in intrinsics-form.
-		// This should be pretty self-documenting...
-		while(count >= 16) {
-			uint8x8x4_t pixel_set_a, pixel_set_b;
-			uint16x8_t red_a, green_a, blue_a;
-			uint16x8_t red_b, green_b, blue_b;
-			uint16x8_t dest_pixels_a, dest_pixels_b;
-
-			count -= 16;
-			__pld(src_ptr + src_stride);
-			pixel_set_a = vld4_u8((uint8_t*)(src_ptr));
-			pixel_set_b = vld4_u8((uint8_t*)(src_ptr+8));
-			src_ptr += 16;
-
-			red_a   = vshll_n_u8(pixel_set_a.val[2], 8);
-			green_a = vshll_n_u8(pixel_set_a.val[1], 8);
-			blue_a  = vshll_n_u8(pixel_set_a.val[0], 8);
-			red_b   = vshll_n_u8(pixel_set_b.val[2], 8);
-			green_b = vshll_n_u8(pixel_set_b.val[1], 8);
-			blue_b  = vshll_n_u8(pixel_set_b.val[0], 8);
-			dest_pixels_a = vsriq_n_u16(red_a, green_a, 5);
-			dest_pixels_b = vsriq_n_u16(red_b, green_b, 5);
-			dest_pixels_a = vsriq_n_u16(dest_pixels_a, blue_a, 11);
-			dest_pixels_b = vsriq_n_u16(dest_pixels_b, blue_b, 11);
-
-			// There doesn't seem to be an intrinsic for the double-quadword variant
-			vst1q_u16(dst_ptr  , dest_pixels_a);
-			vst1q_u16(dst_ptr+8, dest_pixels_b);
-			dst_ptr += 16;
-		}
-
-		// 8-pixel loop
-		if(count >= 8) {
-			uint8x8x4_t pixel_set_a;
-			uint16x8_t red_a, green_a, blue_a;
-			uint16x8_t dest_pixels_a;
-
-			__pld(src_ptr + src_stride);
-			count -= 8;
-			pixel_set_a = vld4_u8((uint8_t*)(src_ptr));
-			src_ptr += 8;
-
-			red_a   = vshll_n_u8(pixel_set_a.val[2], 8);
-			green_a = vshll_n_u8(pixel_set_a.val[1], 8);
-			blue_a  = vshll_n_u8(pixel_set_a.val[0], 8);
-			dest_pixels_a = vsriq_n_u16(red_a, green_a, 5);
-			dest_pixels_a = vsriq_n_u16(dest_pixels_a, blue_a, 11);
-
-			vst1q_u16(dst_ptr  , dest_pixels_a);
-			dst_ptr += 8;
-		}
-
-#endif	// USE_GCC_INLINE_ASM
-
-	small_stuff:
-
-		if(count)
-			__pld(src_ptr + src_stride);
-
-		while(count >= 2) {
-			uint32_t src_pixel_a = *src_ptr++;
-			uint32_t src_pixel_b = *src_ptr++;
-
-			// ARM is really good at shift-then-ALU ops.
-			// This should be a total of six shift-ANDs and five shift-ORs.
-			uint32_t dst_pixels_a;
-			uint32_t dst_pixels_b;
-
-			dst_pixels_a  = ((src_pixel_a >>  3) & rb_mask);
-			dst_pixels_a |= ((src_pixel_a >> 10) &  g_mask) << 5;
-			dst_pixels_a |= ((src_pixel_a >> 19) & rb_mask) << 11;
-
-			dst_pixels_b  = ((src_pixel_b >>  3) & rb_mask);
-			dst_pixels_b |= ((src_pixel_b >> 10) &  g_mask) << 5;
-			dst_pixels_b |= ((src_pixel_b >> 19) & rb_mask) << 11;
-
-			// little-endian mode only
-			*((uint32_t*) dst_ptr) = dst_pixels_a | (dst_pixels_b << 16);
-			dst_ptr += 2;
-			count -= 2;
-		}
-
-		if(count) {
-			uint32_t src_pixel = *src_ptr++;
-
-			// ARM is really good at shift-then-ALU ops.
-			// This block should end up as three shift-ANDs and two shift-ORs.
-			uint32_t tmp_blue  = (src_pixel >>  3) & rb_mask;
-			uint32_t tmp_green = (src_pixel >> 10) & g_mask;
-			uint32_t tmp_red   = (src_pixel >> 19) & rb_mask;
-			uint16_t dst_pixel = (tmp_red << 11) | (tmp_green << 5) | tmp_blue;
-
-			*dst_ptr++ = dst_pixel;
-			count--;
-		}
-
-		src_line += src_stride;
-		dst_line += dst_stride;
+	/* A copy of the above code, in intrinsics-form. */
+	while (count >= 16)
+	{
+	    uint8x8x4_t pixel_set_a, pixel_set_b;
+	    uint16x8_t red_a, green_a, blue_a;
+	    uint16x8_t red_b, green_b, blue_b;
+	    uint16x8_t dest_pixels_a, dest_pixels_b;
+
+	    count -= 16;
+	    __pld (src_ptr + src_stride);
+	    pixel_set_a = vld4_u8 ((uint8_t*)(src_ptr));
+	    pixel_set_b = vld4_u8 ((uint8_t*)(src_ptr + 8));
+	    src_ptr += 16;
+
+	    red_a   = vshll_n_u8 (pixel_set_a.val[2], 8);
+	    green_a = vshll_n_u8 (pixel_set_a.val[1], 8);
+	    blue_a  = vshll_n_u8 (pixel_set_a.val[0], 8);
+	    
+	    red_b   = vshll_n_u8 (pixel_set_b.val[2], 8);
+	    green_b = vshll_n_u8 (pixel_set_b.val[1], 8);
+	    blue_b  = vshll_n_u8 (pixel_set_b.val[0], 8);
+	    
+	    dest_pixels_a = vsriq_n_u16 (red_a, green_a, 5);
+	    dest_pixels_b = vsriq_n_u16 (red_b, green_b, 5);
+	    
+	    dest_pixels_a = vsriq_n_u16 (dest_pixels_a, blue_a, 11);
+	    dest_pixels_b = vsriq_n_u16 (dest_pixels_b, blue_b, 11);
+
+	    /* There doesn't seem to be an intrinsic for the
+	     * double-quadword variant
+	     */
+	    vst1q_u16 (dst_ptr, dest_pixels_a);
+	    vst1q_u16 (dst_ptr + 8, dest_pixels_b);
+	    dst_ptr += 16;
+	}
+
+	/* 8-pixel loop */
+	if (count >= 8)
+	{
+	    uint8x8x4_t pixel_set_a;
+	    uint16x8_t red_a, green_a, blue_a;
+	    uint16x8_t dest_pixels_a;
+
+	    __pld (src_ptr + src_stride);
+	    count -= 8;
+	    pixel_set_a = vld4_u8 ((uint8_t*)(src_ptr));
+	    src_ptr += 8;
+
+	    red_a   = vshll_n_u8 (pixel_set_a.val[2], 8);
+	    green_a = vshll_n_u8 (pixel_set_a.val[1], 8);
+	    blue_a  = vshll_n_u8 (pixel_set_a.val[0], 8);
+
+	    dest_pixels_a = vsriq_n_u16 (red_a, green_a, 5);
+	    dest_pixels_a = vsriq_n_u16 (dest_pixels_a, blue_a, 11);
+
+	    vst1q_u16 (dst_ptr, dest_pixels_a);
+	    dst_ptr += 8;
+	}
+
+#endif  /* USE_GCC_INLINE_ASM */
+
+    small_stuff:
+	if (count)
+	    __pld (src_ptr + src_stride);
+
+	while (count >= 2)
+	{
+	    uint32_t src_pixel_a = *src_ptr++;
+	    uint32_t src_pixel_b = *src_ptr++;
+
+	    /* ARM is really good at shift-then-ALU ops. */
+	    /* This should be a total of six shift-ANDs and five shift-ORs. */
+	    uint32_t dst_pixels_a;
+	    uint32_t dst_pixels_b;
+
+	    dst_pixels_a  = ((src_pixel_a >>  3) & rb_mask);
+	    dst_pixels_a |= ((src_pixel_a >> 10) &  g_mask) << 5;
+	    dst_pixels_a |= ((src_pixel_a >> 19) & rb_mask) << 11;
+
+	    dst_pixels_b  = ((src_pixel_b >>  3) & rb_mask);
+	    dst_pixels_b |= ((src_pixel_b >> 10) &  g_mask) << 5;
+	    dst_pixels_b |= ((src_pixel_b >> 19) & rb_mask) << 11;
+
+	    /* little-endian mode only */
+	    *((uint32_t*) dst_ptr) = dst_pixels_a | (dst_pixels_b << 16);
+	    dst_ptr += 2;
+	    count -= 2;
+	}
+
+	if (count)
+	{
+	    uint32_t src_pixel = *src_ptr++;
+
+	    /* ARM is really good at shift-then-ALU ops.
+	     * This block should end up as three shift-ANDs
+	     * and two shift-ORs.
+	     */
+	    uint32_t tmp_blue  = (src_pixel >>  3) & rb_mask;
+	    uint32_t tmp_green = (src_pixel >> 10) & g_mask;
+	    uint32_t tmp_red   = (src_pixel >> 19) & rb_mask;
+	    uint16_t dst_pixel = (tmp_red << 11) | (tmp_green << 5) | tmp_blue;
+
+	    *dst_ptr++ = dst_pixel;
+	    count--;
 	}
-}
 
+	src_line += src_stride;
+	dst_line += dst_stride;
+    }
+}
 
 static pixman_bool_t
 pixman_fill_neon (uint32_t *bits,
-		  int stride,
-		  int bpp,
-		  int x,
-		  int y,
-		  int width,
-		  int height,
-		  uint32_t _xor)
+                  int       stride,
+                  int       bpp,
+                  int       x,
+                  int       y,
+                  int       width,
+                  int       height,
+                  uint32_t  _xor)
 {
     uint32_t byte_stride, color;
     char *dst;
@@ -1320,845 +1375,925 @@ pixman_fill_neon (uint32_t *bits,
 
     switch (bpp)
     {
-	case 8:
-	    dst = ((char *) bits) + y * byte_stride + x;
-	    _xor &= 0xff;
-	    color = _xor << 24 | _xor << 16 | _xor << 8 | _xor;
-	    break;
-	case 16:
-	    dst = ((char *) bits) + y * byte_stride + x * 2;
-	    _xor &= 0xffff;
-	    color = _xor << 16 | _xor;
-	    width *= 2;     /* width to bytes */
-	    break;
-	case 32:
-	    dst = ((char *) bits) + y * byte_stride + x * 4;
-	    color = _xor;
-	    width *= 4;     /* width to bytes */
-	    break;
-	default:
-	    return FALSE;
+    case 8:
+	dst = ((char *) bits) + y * byte_stride + x;
+	_xor &= 0xff;
+	color = _xor << 24 | _xor << 16 | _xor << 8 | _xor;
+	break;
+
+    case 16:
+	dst = ((char *) bits) + y * byte_stride + x * 2;
+	_xor &= 0xffff;
+	color = _xor << 16 | _xor;
+	width *= 2;         /* width to bytes */
+	break;
+
+    case 32:
+	dst = ((char *) bits) + y * byte_stride + x * 4;
+	color = _xor;
+	width *= 4;         /* width to bytes */
+	break;
+
+    default:
+	return FALSE;
     }
 
 #ifdef USE_GCC_INLINE_ASM
     if (width < 16)
+    {
 	/* We have a special case for such small widths that don't allow
-	   us to use wide 128-bit stores anyway. We don't waste time
-	   trying to align writes, since there are only very few of them anyway */
+	 * us to use wide 128-bit stores anyway. We don't waste time
+	 * trying to align writes, since there are only very few of them anyway
+	 */
 	asm volatile (
-	"cmp		%[height], #0\n" /* Check if empty fill */
-	"beq		3f\n"
-	"vdup.32	d0, %[color]\n"  /* Fill the color to neon req */
-
-	/* Check if we have a such width that can easily be handled by single
-	   operation for each scanline. This significantly reduces the number
-	   of test/branch instructions for each scanline */
-	"cmp		%[width], #8\n"
-	"beq		4f\n"
-	"cmp		%[width], #4\n"
-	"beq		5f\n"
-	"cmp		%[width], #2\n"
-	"beq		6f\n"
-
-	/* Loop starts here for each scanline */
-	"1:\n"
-	"mov		r4, %[dst]\n"    /* Starting address of the current line */
-	"tst		%[width], #8\n"
-	"beq		2f\n"
-	"vst1.8		{d0}, [r4]!\n"
-	"2:\n"
-	"tst		%[width], #4\n"
-	"beq		2f\n"
-	"str		%[color], [r4], #4\n"
-	"2:\n"
-	"tst		%[width], #2\n"
-	"beq		2f\n"
-	"strh		%[color], [r4], #2\n"
-	"2:\n"
-	"tst		%[width], #1\n"
-	"beq		2f\n"
-	"strb		%[color], [r4], #1\n"
-	"2:\n"
-
-	"subs		%[height], %[height], #1\n"
-	"add		%[dst], %[dst], %[byte_stride]\n"
-	"bne		1b\n"
-	"b		3f\n"
-
-	/* Special fillers for those widths that we can do with single operation */
-	"4:\n"
-	"subs		%[height], %[height], #1\n"
-	"vst1.8		{d0}, [%[dst]]\n"
-	"add		%[dst], %[dst], %[byte_stride]\n"
-	"bne		4b\n"
-	"b		3f\n"
-
-	"5:\n"
-	"subs		%[height], %[height], #1\n"
-	"str		%[color], [%[dst]]\n"
-	"add		%[dst], %[dst], %[byte_stride]\n"
-	"bne		5b\n"
-	"b		3f\n"
-
-	"6:\n"
-	"subs		%[height], %[height], #1\n"
-	"strh		%[color], [%[dst]]\n"
-	"add		%[dst], %[dst], %[byte_stride]\n"
-	"bne		6b\n"
-
-	"3:\n"
-	: /* No output members */
-	: [color] "r" (color), [height] "r" (height), [width] "r" (width),
-	  [dst] "r" (dst) , [byte_stride] "r" (byte_stride)
-	: "memory", "cc", "d0", "r4", "r5");
+	    "cmp		%[height], #0\n"/* Check if empty fill */
+	    "beq		3f\n"
+	    "vdup.32	d0, %[color]\n"/* Fill the color to neon req */
+
+	    /* Check if we have a such width that can easily be handled by single
+	     * operation for each scanline. This significantly reduces the number
+	     * of test/branch instructions for each scanline
+	     */
+	    "cmp		%[width], #8\n"
+	    "beq		4f\n"
+	    "cmp		%[width], #4\n"
+	    "beq		5f\n"
+	    "cmp		%[width], #2\n"
+	    "beq		6f\n"
+
+	    /* Loop starts here for each scanline */
+	    "1:\n"
+	    "mov		r4, %[dst]\n" /* Starting address of the current line */
+	    "tst		%[width], #8\n"
+	    "beq		2f\n"
+	    "vst1.8		{d0}, [r4]!\n"
+	    "2:\n"
+	    "tst		%[width], #4\n"
+	    "beq		2f\n"
+	    "str		%[color], [r4], #4\n"
+	    "2:\n"
+	    "tst		%[width], #2\n"
+	    "beq		2f\n"
+	    "strh		%[color], [r4], #2\n"
+	    "2:\n"
+	    "tst		%[width], #1\n"
+	    "beq		2f\n"
+	    "strb		%[color], [r4], #1\n"
+	    "2:\n"
+
+	    "subs		%[height], %[height], #1\n"
+	    "add		%[dst], %[dst], %[byte_stride]\n"
+	    "bne		1b\n"
+	    "b		3f\n"
+
+	    /* Special fillers for those widths that we can do with single operation */
+	    "4:\n"
+	    "subs		%[height], %[height], #1\n"
+	    "vst1.8		{d0}, [%[dst]]\n"
+	    "add		%[dst], %[dst], %[byte_stride]\n"
+	    "bne		4b\n"
+	    "b		3f\n"
+
+	    "5:\n"
+	    "subs		%[height], %[height], #1\n"
+	    "str		%[color], [%[dst]]\n"
+	    "add		%[dst], %[dst], %[byte_stride]\n"
+	    "bne		5b\n"
+	    "b		3f\n"
+
+	    "6:\n"
+	    "subs		%[height], %[height], #1\n"
+	    "strh		%[color], [%[dst]]\n"
+	    "add		%[dst], %[dst], %[byte_stride]\n"
+	    "bne		6b\n"
+
+	    "3:\n"
+	    
+	    : /* No output members */
+	    : [color] "r" (color), [height] "r" (height), [width] "r" (width),
+	    [dst] "r" (dst), [byte_stride] "r" (byte_stride)
+	    : "memory", "cc", "d0", "r4", "r5");
+    }
     else
+    {
 	asm volatile (
-	"cmp		%[height], #0\n" /* Check if empty fill */
-	"beq		5f\n"
-	"vdup.32	q0, %[color]\n"  /* Fill the color to neon req */
-
-	/* Loop starts here for each scanline */
-	"1:\n"
-	"mov		r4, %[dst]\n"    /* Starting address of the current line */
-	"mov		r5, %[width]\n"  /* We're going to write this many bytes */
-	"ands		r6, r4, #15\n"   /* Are we at the 128-bit aligned address? */
-	"beq		2f\n"            /* Jump to the best case */
-
-	/* We're not 128-bit aligned: However, we know that we can get to the
-	   next aligned location, since the fill is at least 16 bytes wide */
-	"rsb 		r6, r6, #16\n"   /* We would need to go forward this much */
-	"sub		r5, r5, r6\n"    /* Update bytes left */
-	"tst		r6, #1\n"
-	"beq		6f\n"
-	"vst1.8		{d0[0]}, [r4]!\n"/* Store byte, now we are word aligned */
-	"6:\n"
-	"tst		r6, #2\n"
-	"beq		6f\n"
-	"vst1.16	{d0[0]}, [r4, :16]!\n"/* Store half word, now we are 16-bit aligned */
-	"6:\n"
-	"tst		r6, #4\n"
-	"beq		6f\n"
-	"vst1.32	{d0[0]}, [r4, :32]!\n"/* Store word, now we're 32-bit aligned */
-	"6:\n"
-	"tst		r6, #8\n"
-	"beq		2f\n"
-	"vst1.64	{d0}, [r4, :64]!\n"    /* Store qword now we're 64-bit aligned */
-
-	/* The good case: We're 128-bit aligned for this scanline */
-	"2:\n"
-	"and		r6, r5, #15\n"        /* Number of tailing bytes */
-	"cmp		r5, r6\n"             /* Do we have at least one qword to write? */
-	"beq		6f\n"                 /* No, we just write the tail */
-	"lsr		r5, r5, #4\n"         /* This many full qwords to write */
-
-	/* The main block: Do 128-bit aligned writes */
-	"3:\n"
-	"subs		r5, r5, #1\n"
-	"vst1.64	{d0,d1}, [r4, :128]!\n"
-	"bne		3b\n"
-
-	/* Handle the tailing bytes: Do 64, 32, 16 and 8-bit aligned writes as needed.
-	    We know that we're currently at 128-bit aligned address, so we can just
-	    pick the biggest operations that the remaining write width allows */
-	"6:\n"
-	"cmp		r6, #0\n"
-	"beq		4f\n"
-	"tst		r6, #8\n"
-	"beq		6f\n"
-	"vst1.64	{d0}, [r4, :64]!\n"
-	"6:\n"
-	"tst		r6, #4\n"
-	"beq		6f\n"
-	"vst1.32	{d0[0]}, [r4, :32]!\n"
-	"6:\n"
-	"tst		r6, #2\n"
-	"beq		6f\n"
-	"vst1.16	{d0[0]}, [r4, :16]!\n"
-	"6:\n"
-	"tst		r6, #1\n"
-	"beq		4f\n"
-	"vst1.8		{d0[0]}, [r4]!\n"
-	"4:\n"
-
-	/* Handle the next scanline */
-	"subs		%[height], %[height], #1\n"
-	"add		%[dst], %[dst], %[byte_stride]\n"
-	"bne		1b\n"
-	"5:\n"
-	: /* No output members */
-	: [color] "r" (color), [height] "r" (height), [width] "r" (width),
-	  [dst] "r" (dst) , [byte_stride] "r" (byte_stride)
-	: "memory", "cc", "q0", "d0", "d1", "r4", "r5", "r6");
-
+	    "cmp		%[height], #0\n"/* Check if empty fill */
+	    "beq		5f\n"
+	    "vdup.32	q0, %[color]\n"/* Fill the color to neon req */
+
+	    /* Loop starts here for each scanline */
+	    "1:\n"
+	    "mov		r4, %[dst]\n"/* Starting address of the current line */
+	    "mov		r5, %[width]\n"/* We're going to write this many bytes */
+	    "ands		r6, r4, #15\n"/* Are we at the 128-bit aligned address? */
+	    "beq		2f\n"/* Jump to the best case */
+
+	    /* We're not 128-bit aligned: However, we know that we can get to the
+	       next aligned location, since the fill is at least 16 bytes wide */
+	    "rsb                r6, r6, #16\n" /* We would need to go forward this much */
+	    "sub		r5, r5, r6\n"/* Update bytes left */
+	    "tst		r6, #1\n"
+	    "beq		6f\n"
+	    "vst1.8		{d0[0]}, [r4]!\n"/* Store byte, now we are word aligned */
+	    "6:\n"
+	    "tst		r6, #2\n"
+	    "beq		6f\n"
+	    "vst1.16	{d0[0]}, [r4, :16]!\n"/* Store half word, now we are 16-bit aligned */
+	    "6:\n"
+	    "tst		r6, #4\n"
+	    "beq		6f\n"
+	    "vst1.32	{d0[0]}, [r4, :32]!\n"/* Store word, now we're 32-bit aligned */
+	    "6:\n"
+	    "tst		r6, #8\n"
+	    "beq		2f\n"
+	    "vst1.64	{d0}, [r4, :64]!\n"/* Store qword now we're 64-bit aligned */
+
+	    /* The good case: We're 128-bit aligned for this scanline */
+	    "2:\n"
+	    "and		r6, r5, #15\n"/* Number of tailing bytes */
+	    "cmp		r5, r6\n"/* Do we have at least one qword to write? */
+	    "beq		6f\n"/* No, we just write the tail */
+	    "lsr		r5, r5, #4\n"/* This many full qwords to write */
+
+	    /* The main block: Do 128-bit aligned writes */
+	    "3:\n"
+	    "subs		r5, r5, #1\n"
+	    "vst1.64	{d0,d1}, [r4, :128]!\n"
+	    "bne		3b\n"
+
+	    /* Handle the tailing bytes: Do 64, 32, 16 and 8-bit aligned writes as needed.
+	       We know that we're currently at 128-bit aligned address, so we can just
+	       pick the biggest operations that the remaining write width allows */
+	    "6:\n"
+	    "cmp		r6, #0\n"
+	    "beq		4f\n"
+	    "tst		r6, #8\n"
+	    "beq		6f\n"
+	    "vst1.64	{d0}, [r4, :64]!\n"
+	    "6:\n"
+	    "tst		r6, #4\n"
+	    "beq		6f\n"
+	    "vst1.32	{d0[0]}, [r4, :32]!\n"
+	    "6:\n"
+	    "tst		r6, #2\n"
+	    "beq		6f\n"
+	    "vst1.16	{d0[0]}, [r4, :16]!\n"
+	    "6:\n"
+	    "tst		r6, #1\n"
+	    "beq		4f\n"
+	    "vst1.8		{d0[0]}, [r4]!\n"
+	    "4:\n"
+
+	    /* Handle the next scanline */
+	    "subs		%[height], %[height], #1\n"
+	    "add		%[dst], %[dst], %[byte_stride]\n"
+	    "bne		1b\n"
+	    "5:\n"
+	    : /* No output members */
+	    : [color] "r" (color), [height] "r" (height), [width] "r" (width),
+	    [dst] "r" (dst), [byte_stride] "r" (byte_stride)
+	    : "memory", "cc", "q0", "d0", "d1", "r4", "r5", "r6");
+    }
     return TRUE;
 
 #else
 
-    // TODO: intrinsic version for armcc
+    /* TODO: intrinsic version for armcc */
     return FALSE;
 
 #endif
 }
 
-
-// TODO: is there a more generic way of doing this being introduced?
+/* TODO: is there a more generic way of doing this being introduced? */
 #define NEON_SCANLINE_BUFFER_PIXELS (1024)
 
-static inline void neon_quadword_copy(
-	void* dst,
-	void* src,
-	uint32_t count,       // of quadwords
-	uint32_t trailer_count // of bytes
-)
+static inline void
+neon_quadword_copy (void*    dst,
+		    void*    src,
+		    uint32_t count,         /* of quadwords */
+		    uint32_t trailer_count  /* of bytes */)
 {
-	uint8_t *t_dst = dst, *t_src = src;
+    uint8_t *t_dst = dst, *t_src = src;
 
-	// Uses aligned multi-register loads to maximise read bandwidth
-	// on uncached memory such as framebuffers
-	// The accesses do not have the aligned qualifiers, so that the copy
-	// may convert between aligned-uncached and unaligned-cached memory.
-	// It is assumed that the CPU can infer alignedness from the address.
+    /* Uses aligned multi-register loads to maximise read bandwidth
+     * on uncached memory such as framebuffers
+     * The accesses do not have the aligned qualifiers, so that the copy
+     * may convert between aligned-uncached and unaligned-cached memory.
+     * It is assumed that the CPU can infer alignedness from the address.
+     */
 
 #ifdef USE_GCC_INLINE_ASM
 
-	asm volatile (
-	"	cmp       %[count], #8						\n"
-	"	blt 1f    @ skip oversized fragments		\n"
-	"0: @ start with eight quadwords at a time		\n"
-	"	sub       %[count], %[count], #8			\n"
-	"	vld1.8    {d16,d17,d18,d19}, [%[src]]!		\n"
-	"	vld1.8    {d20,d21,d22,d23}, [%[src]]!		\n"
-	"	vld1.8    {d24,d25,d26,d27}, [%[src]]!		\n"
-	"	vld1.8    {d28,d29,d30,d31}, [%[src]]!		\n"
-	"	cmp       %[count], #8						\n"
-	"	vst1.8    {d16,d17,d18,d19}, [%[dst]]!		\n"
-	"	vst1.8    {d20,d21,d22,d23}, [%[dst]]!		\n"
-	"	vst1.8    {d24,d25,d26,d27}, [%[dst]]!		\n"
-	"	vst1.8    {d28,d29,d30,d31}, [%[dst]]!		\n"
-	"	bge 0b										\n"
-	"1: @ four quadwords							\n"
-	"	tst       %[count], #4						\n"
-	"	beq 2f    @ skip oversized fragment			\n"
-	"	vld1.8    {d16,d17,d18,d19}, [%[src]]!		\n"
-	"	vld1.8    {d20,d21,d22,d23}, [%[src]]!		\n"
-	"	vst1.8    {d16,d17,d18,d19}, [%[dst]]!		\n"
-	"	vst1.8    {d20,d21,d22,d23}, [%[dst]]!		\n"
-	"2: @ two quadwords								\n"
-	"	tst       %[count], #2						\n"
-	"	beq 3f    @ skip oversized fragment			\n"
-	"	vld1.8    {d16,d17,d18,d19}, [%[src]]!		\n"
-	"	vst1.8    {d16,d17,d18,d19}, [%[dst]]!		\n"
-	"3: @ one quadword								\n"
-	"	tst       %[count], #1						\n"
-	"	beq 4f    @ skip oversized fragment			\n"
-	"	vld1.8    {d16,d17}, [%[src]]!				\n"
-	"	vst1.8    {d16,d17}, [%[dst]]!				\n"
-	"4: @ end										\n"
-
-	// Clobbered input registers marked as input/outputs
+    asm volatile (
+        "	cmp       %[count], #8				\n"
+        "	blt 1f    @ skip oversized fragments		\n"
+        "0: @ start with eight quadwords at a time		\n"
+        "	sub       %[count], %[count], #8		\n"
+        "	vld1.8    {d16,d17,d18,d19}, [%[src]]!		\n"
+        "	vld1.8    {d20,d21,d22,d23}, [%[src]]!		\n"
+        "	vld1.8    {d24,d25,d26,d27}, [%[src]]!		\n"
+        "	vld1.8    {d28,d29,d30,d31}, [%[src]]!		\n"
+        "	cmp       %[count], #8				\n"
+        "	vst1.8    {d16,d17,d18,d19}, [%[dst]]!		\n"
+        "	vst1.8    {d20,d21,d22,d23}, [%[dst]]!		\n"
+        "	vst1.8    {d24,d25,d26,d27}, [%[dst]]!		\n"
+        "	vst1.8    {d28,d29,d30,d31}, [%[dst]]!		\n"
+        "	bge 0b						\n"
+        "1: @ four quadwords					\n"
+        "	tst       %[count], #4				\n"
+        "	beq 2f    @ skip oversized fragment		\n"
+        "	vld1.8    {d16,d17,d18,d19}, [%[src]]!		\n"
+        "	vld1.8    {d20,d21,d22,d23}, [%[src]]!		\n"
+        "	vst1.8    {d16,d17,d18,d19}, [%[dst]]!		\n"
+        "	vst1.8    {d20,d21,d22,d23}, [%[dst]]!		\n"
+        "2: @ two quadwords					\n"
+        "	tst       %[count], #2				\n"
+        "	beq 3f    @ skip oversized fragment		\n"
+        "	vld1.8    {d16,d17,d18,d19}, [%[src]]!		\n"
+        "	vst1.8    {d16,d17,d18,d19}, [%[dst]]!		\n"
+        "3: @ one quadword					\n"
+        "	tst       %[count], #1				\n"
+        "	beq 4f    @ skip oversized fragment		\n"
+        "	vld1.8    {d16,d17}, [%[src]]!			\n"
+        "	vst1.8    {d16,d17}, [%[dst]]!			\n"
+        "4: @ end						\n"
+
+        /* Clobbered input registers marked as input/outputs */
 	: [dst] "+r" (t_dst), [src] "+r" (t_src), [count] "+r" (count)
 
-	// No unclobbered inputs
+	  /* No unclobbered inputs */
 	:
 
-	// Clobbered vector registers
-	// NB: these are the quad aliases of the double registers used in the asm
-	: "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "cc", "memory"
-	);
+        /* Clobbered vector registers */
+        /* NB: these are the quad aliases of the double
+	 * registers used in the asm
+	 */
+	: "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15", "cc", "memory");
 
 #else
 
-	while(count >= 8) {
-		uint8x16x4_t t1 = vld4q_u8(t_src);
-		uint8x16x4_t t2 = vld4q_u8(t_src + sizeof(uint8x16x4_t));
-		t_src += sizeof(uint8x16x4_t) * 2;
-		vst4q_u8(t_dst, t1);
-		vst4q_u8(t_dst + sizeof(uint8x16x4_t), t2);
-		t_dst += sizeof(uint8x16x4_t) * 2;
-		count -= 8;
-	}
+    while (count >= 8)
+    {
+	uint8x16x4_t t1 = vld4q_u8 (t_src);
+	uint8x16x4_t t2 = vld4q_u8 (t_src + sizeof(uint8x16x4_t));
+	
+	t_src += sizeof(uint8x16x4_t) * 2;
+	vst4q_u8 (t_dst, t1);
+	vst4q_u8 (t_dst + sizeof(uint8x16x4_t), t2);
+	t_dst += sizeof(uint8x16x4_t) * 2;
+	count -= 8;
+    }
 
-	if(count & 4) {
-		uint8x16x4_t t1 = vld4q_u8(t_src);
-		t_src += sizeof(uint8x16x4_t);
-		vst4q_u8(t_dst, t1);
-		t_dst += sizeof(uint8x16x4_t);
+    if (count & 4)
+    {
+	uint8x16x4_t t1 = vld4q_u8 (t_src);
+	
+	t_src += sizeof(uint8x16x4_t);
+	vst4q_u8 (t_dst, t1);
+	t_dst += sizeof(uint8x16x4_t);
+    }
+
+    if (count & 2)
+    {
+	uint8x8x4_t t1 = vld4_u8 (t_src);
+	
+	t_src += sizeof(uint8x8x4_t);
+	vst4_u8 (t_dst, t1);
+	t_dst += sizeof(uint8x8x4_t);
+    }
+
+    if (count & 1)
+    {
+	uint8x16_t t1 = vld1q_u8 (t_src);
+	
+	t_src += sizeof(uint8x16_t);
+	vst1q_u8 (t_dst, t1);
+	t_dst += sizeof(uint8x16_t);
+    }
+
+#endif  /* !USE_GCC_INLINE_ASM */
+
+    if (trailer_count)
+    {
+	if (trailer_count & 8)
+	{
+	    uint8x8_t t1 = vld1_u8 (t_src);
+	    
+	    t_src += sizeof(uint8x8_t);
+	    vst1_u8 (t_dst, t1);
+	    t_dst += sizeof(uint8x8_t);
 	}
 
-	if(count & 2) {
-		uint8x8x4_t t1 = vld4_u8(t_src);
-		t_src += sizeof(uint8x8x4_t);
-		vst4_u8(t_dst, t1);
-		t_dst += sizeof(uint8x8x4_t);
+	if (trailer_count & 4)
+	{
+	    *((uint32_t*) t_dst) = *((uint32_t*) t_src);
+	    
+	    t_dst += 4;
+	    t_src += 4;
 	}
 
-	if(count & 1) {
-		uint8x16_t t1 = vld1q_u8(t_src);
-		t_src += sizeof(uint8x16_t);
-		vst1q_u8(t_dst, t1);
-		t_dst += sizeof(uint8x16_t);
+	if (trailer_count & 2)
+	{
+	    *((uint16_t*) t_dst) = *((uint16_t*) t_src);
+	    
+	    t_dst += 2;
+	    t_src += 2;
 	}
 
-#endif  // !USE_GCC_INLINE_ASM
-
-	if(trailer_count) {
-		if(trailer_count & 8) {
-			uint8x8_t t1 = vld1_u8(t_src);
-			t_src += sizeof(uint8x8_t);
-			vst1_u8(t_dst, t1);
-			t_dst += sizeof(uint8x8_t);
-		}
-
-		if(trailer_count & 4) {
-			*((uint32_t*) t_dst) = *((uint32_t*) t_src);
-			t_dst += 4;
-			t_src += 4;
-		}
-
-		if(trailer_count & 2) {
-			*((uint16_t*) t_dst) = *((uint16_t*) t_src);
-			t_dst += 2;
-			t_src += 2;
-		}
-
-		if(trailer_count & 1) {
-			*t_dst++ = *t_src++;
-		}
+	if (trailer_count & 1)
+	{
+	    *t_dst++ = *t_src++;
 	}
+    }
 }
 
-static inline void solid_over_565_8_pix_neon(
-	uint32_t  glyph_colour,
-	uint16_t *dest,
-	uint8_t  *in_mask,
-	uint32_t  dest_stride,  // bytes, not elements
-	uint32_t  mask_stride,
-	uint32_t  count        // 8-pixel groups
-)
+static inline void
+solid_over_565_8_pix_neon (uint32_t  glyph_colour,
+                           uint16_t *dest,
+                           uint8_t * in_mask,
+                           uint32_t  dest_stride,    /* bytes, not elements */
+                           uint32_t  mask_stride,
+                           uint32_t  count           /* 8-pixel groups */)
 {
-	// Inner loop of glyph blitter (solid colour, alpha mask)
+    /* Inner loop of glyph blitter (solid colour, alpha mask) */
 
 #ifdef USE_GCC_INLINE_ASM
 
-	asm volatile (
-	"	vld4.8 {d20[],d21[],d22[],d23[]}, [%[glyph_colour]]  @ splat solid colour components	\n"
-	"0:	@ loop																				\n"
-	"	vld1.16   {d0,d1}, [%[dest]]         @ load first pixels from framebuffer			\n"
-	"	vld1.8    {d17}, [%[in_mask]]         @ load alpha mask of glyph						\n"
-	"	vmull.u8  q9, d17, d23               @ apply glyph colour alpha to mask				\n"
-	"	vshrn.u16 d17, q9, #8                @ reformat it to match original mask			\n"
-	"	vmvn      d18, d17                   @ we need the inverse mask for the background	\n"
-	"	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits				\n"
-	"	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels			\n"
-	"	vshrn.u16 d4, q0, #3                 @ unpack green									\n"
-	"	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)			\n"
-	"	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)		\n"
-	"	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)			\n"
-	"	vmull.u8  q1, d2, d18                @ apply inverse mask to background red...		\n"
-	"	vmull.u8  q2, d4, d18                @ ...green...									\n"
-	"	vmull.u8  q3, d6, d18                @ ...blue										\n"
-	"	subs      %[count], %[count], #1     @ decrement/test loop counter					\n"
-	"	vmlal.u8  q1, d17, d22               @ add masked foreground red...					\n"
-	"	vmlal.u8  q2, d17, d21               @ ...green...									\n"
-	"	vmlal.u8  q3, d17, d20               @ ...blue										\n"
-	"	add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait		\n"
-	"	vsri.16   q1, q2, #5                 @ pack green behind red						\n"
-	"	vsri.16   q1, q3, #11                @ pack blue into pixels						\n"
-	"	vst1.16   {d2,d3}, [%[dest]]         @ store composited pixels						\n"
-	"	add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer					\n"
-	"	bne 0b                               @ next please									\n"
-
-	// Clobbered registers marked as input/outputs
+    asm volatile (
+        "	vld4.8 {d20[],d21[],d22[],d23[]}, [%[glyph_colour]]  @ splat solid colour components	\n"
+        "0:	@ loop																				\n"
+        "	vld1.16   {d0,d1}, [%[dest]]         @ load first pixels from framebuffer			\n"
+        "	vld1.8    {d17}, [%[in_mask]]         @ load alpha mask of glyph						\n"
+        "	vmull.u8  q9, d17, d23               @ apply glyph colour alpha to mask				\n"
+        "	vshrn.u16 d17, q9, #8                @ reformat it to match original mask			\n"
+        "	vmvn      d18, d17                   @ we need the inverse mask for the background	\n"
+        "	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits				\n"
+        "	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels			\n"
+        "	vshrn.u16 d4, q0, #3                 @ unpack green									\n"
+        "	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)			\n"
+        "	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)		\n"
+        "	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)			\n"
+        "	vmull.u8  q1, d2, d18                @ apply inverse mask to background red...		\n"
+        "	vmull.u8  q2, d4, d18                @ ...green...									\n"
+        "	vmull.u8  q3, d6, d18                @ ...blue										\n"
+        "	subs      %[count], %[count], #1     @ decrement/test loop counter					\n"
+        "	vmlal.u8  q1, d17, d22               @ add masked foreground red...					\n"
+        "	vmlal.u8  q2, d17, d21               @ ...green...									\n"
+        "	vmlal.u8  q3, d17, d20               @ ...blue										\n"
+        "	add %[in_mask], %[in_mask], %[mask_stride] @ advance mask pointer, while we wait		\n"
+        "	vsri.16   q1, q2, #5                 @ pack green behind red						\n"
+        "	vsri.16   q1, q3, #11                @ pack blue into pixels						\n"
+        "	vst1.16   {d2,d3}, [%[dest]]         @ store composited pixels						\n"
+        "	add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer					\n"
+        "	bne 0b                               @ next please									\n"
+
+	/* Clobbered registers marked as input/outputs */
 	: [dest] "+r" (dest), [in_mask] "+r" (in_mask), [count] "+r" (count)
-
-	// Inputs
+	  
+	  /* Inputs */
 	: [dest_stride] "r" (dest_stride), [mask_stride] "r" (mask_stride), [glyph_colour] "r" (&glyph_colour)
 
-	// Clobbers, including the inputs we modify, and potentially lots of memory
+	  /* Clobbers, including the inputs we modify, and potentially lots of memory */
 	: "q0", "q1", "q2", "q3", "d17", "q9", "q10", "q11", "q12", "cc", "memory"
-	);
+        );
 
 #else
 
-	uint8x8x4_t solid_colour = vld4_dup_u8((uint8_t*) &glyph_colour);
+    uint8x8x4_t solid_colour = vld4_dup_u8 ((uint8_t*) &glyph_colour);
 
-	while(count--)
-	{
-		uint16x8_t  pixels = vld1q_u16(dest);
-		uint8x8_t   mask = vshrn_n_u16(vmull_u8(solid_colour.val[3], vld1_u8(in_mask)), 8);
-		uint8x8_t  mask_image = vmvn_u8(mask);
+    while (count--)
+    {
+	uint16x8_t pixels = vld1q_u16 (dest);
+	uint8x8_t mask = vshrn_n_u16 (vmull_u8 (solid_colour.val[3], vld1_u8 (in_mask)), 8);
+	uint8x8_t mask_image = vmvn_u8 (mask);
 
-		uint8x8_t  t_red   = vshrn_n_u16(pixels, 8);
-		uint8x8_t  t_green = vshrn_n_u16(pixels, 3);
-		uint8x8_t  t_blue  = vshrn_n_u16(vsli_n_u8(pixels, pixels, 5), 2);
+	uint8x8_t t_red   = vshrn_n_u16 (pixels, 8);
+	uint8x8_t t_green = vshrn_n_u16 (pixels, 3);
+	uint8x8_t t_blue  = vshrn_n_u16 (vsli_n_u8 (pixels, pixels, 5), 2);
 
-		uint16x8_t s_red   = vmull_u8(vsri_n_u8(t_red  , t_red  , 5), mask_image);
-		uint16x8_t s_green = vmull_u8(vsri_n_u8(t_green, t_green, 6), mask_image);
-		uint16x8_t s_blue  = vmull_u8(          t_blue             , mask_image);
+	uint16x8_t s_red   = vmull_u8 (vsri_n_u8 (t_red, t_red, 5), mask_image);
+	uint16x8_t s_green = vmull_u8 (vsri_n_u8 (t_green, t_green, 6), mask_image);
+	uint16x8_t s_blue  = vmull_u8 (t_blue, mask_image);
 
-		s_red   = vmlal(s_red  , mask, solid_colour.val[2]);
-		s_green = vmlal(s_green, mask, solid_colour.val[1]);
-		s_blue  = vmlal(s_blue , mask, solid_colour.val[0]);
+	s_red   = vmlal (s_red, mask, solid_colour.val[2]);
+	s_green = vmlal (s_green, mask, solid_colour.val[1]);
+	s_blue  = vmlal (s_blue, mask, solid_colour.val[0]);
 
-		pixels = vsri_n_u16(s_red, s_green, 5);
-		pixels = vsri_n_u16(pixels, s_blue, 11);
-		vst1q_u16(dest, pixels);
+	pixels = vsri_n_u16 (s_red, s_green, 5);
+	pixels = vsri_n_u16 (pixels, s_blue, 11);
+	vst1q_u16 (dest, pixels);
 
-		dest += dest_stride;
-		mask += mask_stride;
-	}
+	dest += dest_stride;
+	mask += mask_stride;
+    }
 
 #endif
 }
 
 static void
-neon_composite_over_n_8_0565 (
-	pixman_implementation_t * impl,
-	pixman_op_t op,
-	pixman_image_t * src_image,
-	pixman_image_t * mask_image,
-	pixman_image_t * dst_image,
-	int32_t      src_x,
-	int32_t      src_y,
-	int32_t      mask_x,
-	int32_t      mask_y,
-	int32_t      dest_x,
-	int32_t      dest_y,
-	int32_t      width,
-	int32_t      height)
+neon_composite_over_n_8_0565 (pixman_implementation_t * impl,
+                              pixman_op_t               op,
+                              pixman_image_t *          src_image,
+                              pixman_image_t *          mask_image,
+                              pixman_image_t *          dst_image,
+                              int32_t                   src_x,
+                              int32_t                   src_y,
+                              int32_t                   mask_x,
+                              int32_t                   mask_y,
+                              int32_t                   dest_x,
+                              int32_t                   dest_y,
+                              int32_t                   width,
+                              int32_t                   height)
 {
-	uint32_t     src, srca;
-	uint16_t    *dst_line, *aligned_line;
-	uint8_t     *mask_line;
-	uint32_t     dst_stride, mask_stride;
-	uint32_t     kernel_count, copy_count, copy_tail;
-	uint8_t      kernel_offset, copy_offset;
-
-	src = _pixman_image_get_solid(src_image, dst_image->bits.format);
-
-	// bail out if fully transparent or degenerate
-	srca = src >> 24;
-	if(src == 0)
-		return;
-	if(width == 0 || height == 0)
-		return;
-
-	if(width > NEON_SCANLINE_BUFFER_PIXELS) {
-		// split the blit, so we can use a fixed-size scanline buffer
-		// TODO: there must be a more elegant way of doing this.
-		int x;
-		for(x=0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS) {
-			neon_composite_over_n_8_0565(impl, op, src_image, mask_image, dst_image, src_x+x, src_y, mask_x+x, mask_y, dest_x+x, dest_y,
-											  (x+NEON_SCANLINE_BUFFER_PIXELS > width) ? width-x : NEON_SCANLINE_BUFFER_PIXELS, height);
-		}
-		return;
+    uint32_t  src, srca;
+    uint16_t *dst_line, *aligned_line;
+    uint8_t  *mask_line;
+    uint32_t  dst_stride, mask_stride;
+    uint32_t  kernel_count, copy_count, copy_tail;
+    uint8_t   kernel_offset, copy_offset;
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    /* bail out if fully transparent or degenerate */
+    srca = src >> 24;
+    if (src == 0)
+	return;
+
+    if (width == 0 || height == 0)
+	return;
+
+    if (width > NEON_SCANLINE_BUFFER_PIXELS)
+    {
+	/* split the blit, so we can use a fixed-size scanline buffer
+	 * TODO: there must be a more elegant way of doing this.
+	 */
+	int x;
+	for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
+	{
+	    neon_composite_over_n_8_0565 (
+		impl, op,
+		src_image, mask_image, dst_image,
+		src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
+		(x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
 	}
 
-	PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-	PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+	return;
+    }
+    
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
+
+    /* keep within minimum number of aligned quadwords on width
+     * while also keeping the minimum number of columns to process
+     */
+    {
+	unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
+	unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
+	unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
+
+	/* the fast copy should be quadword aligned */
+	copy_offset = dst_line - ((uint16_t*) aligned_left);
+	aligned_line = dst_line - copy_offset;
+	copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
+	copy_tail = 0;
 
-	// keep within minimum number of aligned quadwords on width
-	// while also keeping the minimum number of columns to process
+	if (aligned_right - aligned_left > ceiling_length)
 	{
-		unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
-		unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
-		unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
-
-		// the fast copy should be quadword aligned
-		copy_offset = dst_line - ((uint16_t*) aligned_left);
-		aligned_line = dst_line - copy_offset;
-		copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
-		copy_tail = 0;
-
-		if(aligned_right - aligned_left > ceiling_length) {
-			// unaligned routine is tightest
-			kernel_count = (uint32_t) (ceiling_length >> 4);
-			kernel_offset = copy_offset;
-		} else {
-			// aligned routine is equally tight, so it is safer to align
-			kernel_count = copy_count;
-			kernel_offset = 0;
-		}
-
-		// We should avoid reading beyond scanline ends for safety
-		if(aligned_line < (dst_line - dest_x) ||
-			(aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
-		{
-			// switch to precise read
-			copy_offset = kernel_offset = 0;
-			aligned_line = dst_line;
-			kernel_count = (uint32_t) (ceiling_length >> 4);
-			copy_count = (width * sizeof(*dst_line)) >> 4;
-			copy_tail = (width * sizeof(*dst_line)) & 0xF;
-		}
+	    /* unaligned routine is tightest */
+	    kernel_count = (uint32_t) (ceiling_length >> 4);
+	    kernel_offset = copy_offset;
+	}
+	else
+	{
+	    /* aligned routine is equally tight, so it is safer to align */
+	    kernel_count = copy_count;
+	    kernel_offset = 0;
 	}
 
+	/* We should avoid reading beyond scanline ends for safety */
+	if (aligned_line < (dst_line - dest_x) ||
+	    (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
 	{
-		uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; // deliberately not initialised
-		uint8_t glyph_line[NEON_SCANLINE_BUFFER_PIXELS + 8];
-		int y = height;
-
-		// row-major order
-		// left edge, middle block, right edge
-		for( ; y--; mask_line += mask_stride, aligned_line += dst_stride, dst_line += dst_stride) {
-			// We don't want to overrun the edges of the glyph, so realign the edge data into known buffers
-			neon_quadword_copy(glyph_line + copy_offset, mask_line, width >> 4, width & 0xF);
-
-			// Uncached framebuffer access is really, really slow if we do it piecemeal.
-			// It should be much faster if we grab it all at once.
-			// One scanline should easily fit in L1 cache, so this should not waste RAM bandwidth.
-			neon_quadword_copy(scan_line, aligned_line, copy_count, copy_tail);
-
-			// Apply the actual filter
-			solid_over_565_8_pix_neon(src, scan_line + kernel_offset, glyph_line + kernel_offset, 8 * sizeof(*dst_line), 8, kernel_count);
-
-			// Copy the modified scanline back
-			neon_quadword_copy(dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
-		}
+	    /* switch to precise read */
+	    copy_offset = kernel_offset = 0;
+	    aligned_line = dst_line;
+	    kernel_count = (uint32_t) (ceiling_length >> 4);
+	    copy_count = (width * sizeof(*dst_line)) >> 4;
+	    copy_tail = (width * sizeof(*dst_line)) & 0xF;
 	}
+    }
+
+    {
+	uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8];         /* deliberately not initialised */
+	uint8_t glyph_line[NEON_SCANLINE_BUFFER_PIXELS + 8];
+	int y = height;
+
+	/* row-major order */
+	/* left edge, middle block, right edge */
+	for ( ; y--; mask_line += mask_stride, aligned_line += dst_stride, dst_line += dst_stride)
+	{
+	    /* We don't want to overrun the edges of the glyph,
+	     * so realign the edge data into known buffers
+	     */
+	    neon_quadword_copy (glyph_line + copy_offset, mask_line, width >> 4, width & 0xF);
+
+	    /* Uncached framebuffer access is really, really slow
+	     * if we do it piecemeal. It should be much faster if we
+	     * grab it all at once. One scanline should easily fit in
+	     * L1 cache, so this should not waste RAM bandwidth.
+	     */
+	    neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
+
+	    /* Apply the actual filter */
+	    solid_over_565_8_pix_neon (
+		src, scan_line + kernel_offset,
+		glyph_line + kernel_offset, 8 * sizeof(*dst_line),
+		8, kernel_count);
+
+	    /* Copy the modified scanline back */
+	    neon_quadword_copy (dst_line, scan_line + copy_offset,
+				width >> 3, (width & 7) * 2);
+	}
+    }
 }
 
 #ifdef USE_GCC_INLINE_ASM
 
-static inline void plain_over_565_8_pix_neon(
-	uint32_t  colour,
-	uint16_t *dest,
-	uint32_t  dest_stride,  // bytes, not elements
-	uint32_t  count        // 8-pixel groups
-)
+static inline void
+plain_over_565_8_pix_neon (uint32_t  colour,
+			   uint16_t *dest,
+			   uint32_t  dest_stride,     /* bytes, not elements */
+			   uint32_t  count            /* 8-pixel groups */)
 {
-	// Inner loop for plain translucent rects (solid colour without alpha mask)
-	asm volatile (
-	"	vld4.8   {d20[],d21[],d22[],d23[]}, [%[colour]]  @ solid colour load/splat \n"
-	"	vmull.u8  q12, d23, d22              @ premultiply alpha red   \n"
-	"	vmull.u8  q13, d23, d21              @ premultiply alpha green \n"
-	"	vmull.u8  q14, d23, d20              @ premultiply alpha blue  \n"
-	"	vmvn      d18, d23                   @ inverse alpha for background \n"
-	"0:	@ loop\n"
-	"	vld1.16   {d0,d1}, [%[dest]]         @ load first pixels from framebuffer	\n"
-	"	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels	\n"
-	"	vshrn.u16 d4, q0, #3                 @ unpack green				\n"
-	"	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits		\n"
-	"	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)	\n"
-	"	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)	\n"
-	"	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)	\n"
-	"	vmov      q0, q12                    @ retrieve foreground red   \n"
-	"	vmlal.u8  q0, d2, d18                @ blend red - my kingdom for a four-operand MLA \n"
-	"	vmov      q1, q13                    @ retrieve foreground green \n"
-	"	vmlal.u8  q1, d4, d18                @ blend green               \n"
-	"	vmov      q2, q14                    @ retrieve foreground blue  \n"
-	"	vmlal.u8  q2, d6, d18                @ blend blue                \n"
-	"	subs      %[count], %[count], #1     @ decrement/test loop counter		\n"
-	"	vsri.16   q0, q1, #5                 @ pack green behind red			\n"
-	"	vsri.16   q0, q2, #11                @ pack blue into pixels			\n"
-	"	vst1.16   {d0,d1}, [%[dest]]         @ store composited pixels			\n"
-	"	add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer		\n"
-	"	bne 0b                               @ next please				\n"
-
-	// Clobbered registers marked as input/outputs
+    /* Inner loop for plain translucent rects
+     * (solid colour without alpha mask)
+     */
+    asm volatile (
+        "	vld4.8   {d20[],d21[],d22[],d23[]}, [%[colour]]  @ solid colour load/splat \n"
+        "	vmull.u8  q12, d23, d22              @ premultiply alpha red   \n"
+        "	vmull.u8  q13, d23, d21              @ premultiply alpha green \n"
+        "	vmull.u8  q14, d23, d20              @ premultiply alpha blue  \n"
+        "	vmvn      d18, d23                   @ inverse alpha for background \n"
+        "0:	@ loop\n"
+        "	vld1.16   {d0,d1}, [%[dest]]         @ load first pixels from framebuffer	\n"
+        "	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels	\n"
+        "	vshrn.u16 d4, q0, #3                 @ unpack green				\n"
+        "	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits		\n"
+        "	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)	\n"
+        "	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)	\n"
+        "	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)	\n"
+        "	vmov      q0, q12                    @ retrieve foreground red   \n"
+        "	vmlal.u8  q0, d2, d18                @ blend red - my kingdom for a four-operand MLA \n"
+        "	vmov      q1, q13                    @ retrieve foreground green \n"
+        "	vmlal.u8  q1, d4, d18                @ blend green               \n"
+        "	vmov      q2, q14                    @ retrieve foreground blue  \n"
+        "	vmlal.u8  q2, d6, d18                @ blend blue                \n"
+        "	subs      %[count], %[count], #1     @ decrement/test loop counter		\n"
+        "	vsri.16   q0, q1, #5                 @ pack green behind red			\n"
+        "	vsri.16   q0, q2, #11                @ pack blue into pixels			\n"
+        "	vst1.16   {d0,d1}, [%[dest]]         @ store composited pixels			\n"
+        "	add %[dest], %[dest], %[dest_stride]  @ advance framebuffer pointer		\n"
+        "	bne 0b                               @ next please				\n"
+
+        /* Clobbered registers marked as input/outputs */
 	: [dest] "+r" (dest), [count] "+r" (count)
 
-	// Inputs
+	  /* Inputs */
 	: [dest_stride] "r" (dest_stride), [colour] "r" (&colour)
 
-	// Clobbers, including the inputs we modify, and potentially lots of memory
-	: "q0", "q1", "q2", "q3", "q9", "q10", "q11", "q12", "q13", "q14", "cc", "memory"
-	);
+	  /* Clobbers, including the inputs we modify, and
+	   * potentially lots of memory
+	   */
+	: "q0", "q1", "q2", "q3", "q9",
+	  "q10", "q11", "q12", "q13", "q14",
+	  "cc", "memory"
+        );
 }
 
 static void
-neon_composite_over_n_0565 (
-	pixman_implementation_t * impl,
-	pixman_op_t op,
-	pixman_image_t * src_image,
-	pixman_image_t * mask_image,
-	pixman_image_t * dst_image,
-	int32_t      src_x,
-	int32_t      src_y,
-	int32_t      mask_x,
-	int32_t      mask_y,
-	int32_t      dest_x,
-	int32_t      dest_y,
-	int32_t      width,
-	int32_t      height)
+neon_composite_over_n_0565 (pixman_implementation_t * impl,
+                            pixman_op_t               op,
+                            pixman_image_t *          src_image,
+                            pixman_image_t *          mask_image,
+                            pixman_image_t *          dst_image,
+                            int32_t                   src_x,
+                            int32_t                   src_y,
+                            int32_t                   mask_x,
+                            int32_t                   mask_y,
+                            int32_t                   dest_x,
+                            int32_t                   dest_y,
+                            int32_t                   width,
+                            int32_t                   height)
 {
-	uint32_t     src, srca;
-	uint16_t    *dst_line, *aligned_line;
-	uint32_t     dst_stride;
-	uint32_t     kernel_count, copy_count, copy_tail;
-	uint8_t      kernel_offset, copy_offset;
-
-	src = _pixman_image_get_solid(src_image, dst_image->bits.format);
-
-	// bail out if fully transparent
-	srca = src >> 24;
-	if(src == 0)
-		return;
-	if(width == 0 || height == 0)
-		return;
-
-	if(width > NEON_SCANLINE_BUFFER_PIXELS) {
-		// split the blit, so we can use a fixed-size scanline buffer
-		// TODO: there must be a more elegant way of doing this.
-		int x;
-		for(x=0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS) {
-			neon_composite_over_n_0565(impl, op, src_image, mask_image, dst_image, src_x+x, src_y, mask_x+x, mask_y, dest_x+x, dest_y,
-										(x+NEON_SCANLINE_BUFFER_PIXELS > width) ? width-x : NEON_SCANLINE_BUFFER_PIXELS, height);
-		}
-		return;
-	}
+    uint32_t src, srca;
+    uint16_t    *dst_line, *aligned_line;
+    uint32_t dst_stride;
+    uint32_t kernel_count, copy_count, copy_tail;
+    uint8_t kernel_offset, copy_offset;
 
-	PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    /* bail out if fully transparent */
+    srca = src >> 24;
+    if (src == 0)
+	return;
+    
+    if (width == 0 || height == 0)
+	return;
 
-	// keep within minimum number of aligned quadwords on width
-	// while also keeping the minimum number of columns to process
+    if (width > NEON_SCANLINE_BUFFER_PIXELS)
+    {
+	/* split the blit, so we can use a fixed-size scanline buffer *
+	 * TODO: there must be a more elegant way of doing this.
+	 */
+	int x;
+	
+	for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
 	{
-		unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
-		unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
-		unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
-
-		// the fast copy should be quadword aligned
-		copy_offset = dst_line - ((uint16_t*) aligned_left);
-		aligned_line = dst_line - copy_offset;
-		copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
-		copy_tail = 0;
-
-		if(aligned_right - aligned_left > ceiling_length) {
-			// unaligned routine is tightest
-			kernel_count = (uint32_t) (ceiling_length >> 4);
-			kernel_offset = copy_offset;
-		} else {
-			// aligned routine is equally tight, so it is safer to align
-			kernel_count = copy_count;
-			kernel_offset = 0;
-		}
-
-		// We should avoid reading beyond scanline ends for safety
-		if(aligned_line < (dst_line - dest_x) ||
-			(aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
-		{
-			// switch to precise read
-			copy_offset = kernel_offset = 0;
-			aligned_line = dst_line;
-			kernel_count = (uint32_t) (ceiling_length >> 4);
-			copy_count = (width * sizeof(*dst_line)) >> 4;
-			copy_tail = (width * sizeof(*dst_line)) & 0xF;
-		}
+	    neon_composite_over_n_0565 (
+		impl, op,
+		src_image, mask_image, dst_image,
+		src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
+		(x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
 	}
+	return;
+    }
 
-	{
-		uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; // deliberately not initialised
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
 
-		// row-major order
-		// left edge, middle block, right edge
-		for( ; height--; aligned_line += dst_stride, dst_line += dst_stride) {
+    /* keep within minimum number of aligned quadwords on width
+     * while also keeping the minimum number of columns to process
+     */
+    {
+	unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
+	unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
+	unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
 
-			// Uncached framebuffer access is really, really slow if we do it piecemeal.
-			// It should be much faster if we grab it all at once.
-			// One scanline should easily fit in L1 cache, so this should not waste RAM bandwidth.
-			neon_quadword_copy(scan_line, aligned_line, copy_count, copy_tail);
+	/* the fast copy should be quadword aligned */
+	copy_offset = dst_line - ((uint16_t*) aligned_left);
+	aligned_line = dst_line - copy_offset;
+	copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
+	copy_tail = 0;
 
-			// Apply the actual filter
-			plain_over_565_8_pix_neon(src, scan_line + kernel_offset, 8 * sizeof(*dst_line), kernel_count);
+	if (aligned_right - aligned_left > ceiling_length)
+	{
+	    /* unaligned routine is tightest */
+	    kernel_count = (uint32_t) (ceiling_length >> 4);
+	    kernel_offset = copy_offset;
+	}
+	else
+	{
+	    /* aligned routine is equally tight, so it is safer to align */
+	    kernel_count = copy_count;
+	    kernel_offset = 0;
+	}
 
-			// Copy the modified scanline back
-			neon_quadword_copy(dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
-		}
+	/* We should avoid reading beyond scanline ends for safety */
+	if (aligned_line < (dst_line - dest_x) ||
+	    (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
+	{
+	    /* switch to precise read */
+	    copy_offset = kernel_offset = 0;
+	    aligned_line = dst_line;
+	    kernel_count = (uint32_t) (ceiling_length >> 4);
+	    copy_count = (width * sizeof(*dst_line)) >> 4;
+	    copy_tail = (width * sizeof(*dst_line)) & 0xF;
 	}
+    }
+
+    {
+	uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8];  /* deliberately not initialised */
+
+	/* row-major order */
+	/* left edge, middle block, right edge */
+	for ( ; height--; aligned_line += dst_stride, dst_line += dst_stride)
+	{
+	    /* Uncached framebuffer access is really, really slow if we do it piecemeal.
+	     * It should be much faster if we grab it all at once.
+	     * One scanline should easily fit in L1 cache, so this should
+	     * not waste RAM bandwidth.
+	     */
+	    neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
+
+	    /* Apply the actual filter */
+	    plain_over_565_8_pix_neon (
+		src, scan_line + kernel_offset, 8 * sizeof(*dst_line), kernel_count);
+
+	    /* Copy the modified scanline back */
+	    neon_quadword_copy (
+		dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
+	}
+    }
 }
 
-static inline void ARGB8_over_565_8_pix_neon(
-	uint32_t *src,
-	uint16_t *dest,
-	uint32_t  src_stride,  // bytes, not elements
-	uint32_t  count        // 8-pixel groups
-)
+static inline void
+ARGB8_over_565_8_pix_neon (uint32_t *src,
+                           uint16_t *dest,
+                           uint32_t  src_stride,     /* bytes, not elements */
+                           uint32_t  count           /* 8-pixel groups */)
 {
-	asm volatile (
-	"0:	@ loop\n"
-	"	pld   [%[src], %[src_stride]]         @ preload from next scanline	\n"
-	"	vld1.16   {d0,d1}, [%[dest]]         @ load pixels from framebuffer	\n"
-	"	vld4.8   {d20,d21,d22,d23},[%[src]]! @ load source image pixels		\n"
-	"	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits		\n"
-	"	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels	\n"
-	"	vshrn.u16 d4, q0, #3                 @ unpack green				\n"
-	"	vmvn      d18, d23                   @ we need the inverse alpha for the background	\n"
-	"	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)	\n"
-	"	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)	\n"
-	"	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)	\n"
-	"	vmull.u8  q1, d2, d18                @ apply inverse alpha to background red...	\n"
-	"	vmull.u8  q2, d4, d18                @ ...green...				\n"
-	"	vmull.u8  q3, d6, d18                @ ...blue					\n"
-	"	subs      %[count], %[count], #1     @ decrement/test loop counter		\n"
-	"	vmlal.u8  q1, d23, d22               @ add blended foreground red...		\n"
-	"	vmlal.u8  q2, d23, d21               @ ...green...				\n"
-	"	vmlal.u8  q3, d23, d20               @ ...blue					\n"
-	"	vsri.16   q1, q2, #5                 @ pack green behind red			\n"
-	"	vsri.16   q1, q3, #11                @ pack blue into pixels			\n"
-	"	vst1.16   {d2,d3}, [%[dest]]!        @ store composited pixels			\n"
-	"	bne 0b                               @ next please				\n"
-
-	// Clobbered registers marked as input/outputs
+    asm volatile (
+        "0:	@ loop\n"
+        "	pld   [%[src], %[src_stride]]         @ preload from next scanline	\n"
+        "	vld1.16   {d0,d1}, [%[dest]]         @ load pixels from framebuffer	\n"
+        "	vld4.8   {d20,d21,d22,d23},[%[src]]! @ load source image pixels		\n"
+        "	vsli.u16  q3, q0, #5                 @ duplicate framebuffer blue bits		\n"
+        "	vshrn.u16 d2, q0, #8                 @ unpack red from framebuffer pixels	\n"
+        "	vshrn.u16 d4, q0, #3                 @ unpack green				\n"
+        "	vmvn      d18, d23                   @ we need the inverse alpha for the background	\n"
+        "	vsri.u8   d2, d2, #5                 @ duplicate red bits (extend 5 to 8)	\n"
+        "	vshrn.u16 d6, q3, #2                 @ unpack extended blue (truncate 10 to 8)	\n"
+        "	vsri.u8   d4, d4, #6                 @ duplicate green bits (extend 6 to 8)	\n"
+        "	vmull.u8  q1, d2, d18                @ apply inverse alpha to background red...	\n"
+        "	vmull.u8  q2, d4, d18                @ ...green...				\n"
+        "	vmull.u8  q3, d6, d18                @ ...blue					\n"
+        "	subs      %[count], %[count], #1     @ decrement/test loop counter		\n"
+        "	vmlal.u8  q1, d23, d22               @ add blended foreground red...		\n"
+        "	vmlal.u8  q2, d23, d21               @ ...green...				\n"
+        "	vmlal.u8  q3, d23, d20               @ ...blue					\n"
+        "	vsri.16   q1, q2, #5                 @ pack green behind red			\n"
+        "	vsri.16   q1, q3, #11                @ pack blue into pixels			\n"
+        "	vst1.16   {d2,d3}, [%[dest]]!        @ store composited pixels			\n"
+        "	bne 0b                               @ next please				\n"
+
+        /* Clobbered registers marked as input/outputs */
 	: [dest] "+r" (dest), [src] "+r" (src), [count] "+r" (count)
 
-	// Inputs
+	  /* Inputs */
 	: [src_stride] "r" (src_stride)
 
-	// Clobbers, including the inputs we modify, and potentially lots of memory
+	  /* Clobbers, including the inputs we modify, and potentially lots of memory */
 	: "q0", "q1", "q2", "q3", "d17", "d18", "q10", "q11", "cc", "memory"
-	);
+        );
 }
 
 static void
-neon_composite_over_8888_0565 (
-	pixman_implementation_t * impl,
-	pixman_op_t op,
-	pixman_image_t * src_image,
-	pixman_image_t * mask_image,
-	pixman_image_t * dst_image,
-	int32_t      src_x,
-	int32_t      src_y,
-	int32_t      mask_x,
-	int32_t      mask_y,
-	int32_t      dest_x,
-	int32_t      dest_y,
-	int32_t      width,
-	int32_t      height)
+neon_composite_over_8888_0565 (pixman_implementation_t * impl,
+                               pixman_op_t               op,
+                               pixman_image_t *          src_image,
+                               pixman_image_t *          mask_image,
+                               pixman_image_t *          dst_image,
+                               int32_t                   src_x,
+                               int32_t                   src_y,
+                               int32_t                   mask_x,
+                               int32_t                   mask_y,
+                               int32_t                   dest_x,
+                               int32_t                   dest_y,
+                               int32_t                   width,
+                               int32_t                   height)
 {
-	uint32_t    *src_line;
-	uint16_t    *dst_line, *aligned_line;
-	uint32_t     dst_stride, src_stride;
-	uint32_t     kernel_count, copy_count, copy_tail;
-	uint8_t      kernel_offset, copy_offset;
-
-	// we assume mask is opaque
-	// so the only alpha to deal with is embedded in src
-
-	if(width > NEON_SCANLINE_BUFFER_PIXELS) {
-		// split the blit, so we can use a fixed-size scanline buffer
-		int x;
-		for(x=0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS) {
-			neon_composite_over_8888_0565(impl, op, src_image, mask_image, dst_image, src_x+x, src_y, mask_x+x, mask_y, dest_x+x, dest_y,
-										  (x+NEON_SCANLINE_BUFFER_PIXELS > width) ? width-x : NEON_SCANLINE_BUFFER_PIXELS, height);
-		}
-		return;
+    uint32_t    *src_line;
+    uint16_t    *dst_line, *aligned_line;
+    uint32_t dst_stride, src_stride;
+    uint32_t kernel_count, copy_count, copy_tail;
+    uint8_t kernel_offset, copy_offset;
+
+    /* we assume mask is opaque 
+     * so the only alpha to deal with is embedded in src
+     */
+    if (width > NEON_SCANLINE_BUFFER_PIXELS)
+    {
+	/* split the blit, so we can use a fixed-size scanline buffer */
+	int x;
+	for (x = 0; x < width; x += NEON_SCANLINE_BUFFER_PIXELS)
+	{
+	    neon_composite_over_8888_0565 (
+		impl, op,
+		src_image, mask_image, dst_image,
+		src_x + x, src_y, mask_x + x, mask_y, dest_x + x, dest_y,
+		(x + NEON_SCANLINE_BUFFER_PIXELS > width) ? width - x : NEON_SCANLINE_BUFFER_PIXELS, height);
 	}
+	return;
+    }
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    /* keep within minimum number of aligned quadwords on width
+     * while also keeping the minimum number of columns to process
+     */
+    {
+	unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
+	unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
+	unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
 
-	PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
-	PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+	/* the fast copy should be quadword aligned */
+	copy_offset = dst_line - ((uint16_t*) aligned_left);
+	aligned_line = dst_line - copy_offset;
+	copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
+	copy_tail = 0;
 
-	// keep within minimum number of aligned quadwords on width
-	// while also keeping the minimum number of columns to process
+	if (aligned_right - aligned_left > ceiling_length)
+	{
+	    /* unaligned routine is tightest */
+	    kernel_count = (uint32_t) (ceiling_length >> 4);
+	    kernel_offset = copy_offset;
+	}
+	else
 	{
-		unsigned long aligned_left = (unsigned long)(dst_line) & ~0xF;
-		unsigned long aligned_right = (((unsigned long)(dst_line + width)) + 0xF) & ~0xF;
-		unsigned long ceiling_length = (((unsigned long) width) * sizeof(*dst_line) + 0xF) & ~0xF;
-
-		// the fast copy should be quadword aligned
-		copy_offset = dst_line - ((uint16_t*) aligned_left);
-		aligned_line = dst_line - copy_offset;
-		copy_count = (uint32_t) ((aligned_right - aligned_left) >> 4);
-		copy_tail = 0;
-
-		if(aligned_right - aligned_left > ceiling_length) {
-			// unaligned routine is tightest
-			kernel_count = (uint32_t) (ceiling_length >> 4);
-			kernel_offset = copy_offset;
-		} else {
-			// aligned routine is equally tight, so it is safer to align
-			kernel_count = copy_count;
-			kernel_offset = 0;
-		}
-
-		// We should avoid reading beyond scanline ends for safety
-		if(aligned_line < (dst_line - dest_x) ||
-			(aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
-		{
-			// switch to precise read
-			copy_offset = kernel_offset = 0;
-			aligned_line = dst_line;
-			kernel_count = (uint32_t) (ceiling_length >> 4);
-			copy_count = (width * sizeof(*dst_line)) >> 4;
-			copy_tail = (width * sizeof(*dst_line)) & 0xF;
-		}
+	    /* aligned routine is equally tight, so it is safer to align */
+	    kernel_count = copy_count;
+	    kernel_offset = 0;
 	}
 
-	/* Preload the first input scanline */
+	/* We should avoid reading beyond scanline ends for safety */
+	if (aligned_line < (dst_line - dest_x) ||
+	    (aligned_line + (copy_count * 16 / sizeof(*dst_line))) > ((dst_line - dest_x) + dst_image->bits.width))
 	{
-		uint8_t *src_ptr = (uint8_t*) src_line;
-		uint32_t count = (width + 15) / 16;
+	    /* switch to precise read */
+	    copy_offset = kernel_offset = 0;
+	    aligned_line = dst_line;
+	    kernel_count = (uint32_t) (ceiling_length >> 4);
+	    copy_count = (width * sizeof(*dst_line)) >> 4;
+	    copy_tail = (width * sizeof(*dst_line)) & 0xF;
+	}
+    }
+
+    /* Preload the first input scanline */
+    {
+	uint8_t *src_ptr = (uint8_t*) src_line;
+	uint32_t count = (width + 15) / 16;
 
 #ifdef USE_GCC_INLINE_ASM
-		asm volatile (
-		"0: @ loop						\n"
-		"	subs    %[count], %[count], #1			\n"
-		"	pld     [%[src]]				\n"
-		"	add     %[src], %[src], #64			\n"
-		"	bgt 0b						\n"
-
-		// Clobbered input registers marked as input/outputs
-		: [src] "+r" (src_ptr), [count] "+r" (count)
-		: // no unclobbered inputs
-		: "cc"
-		);
+	asm volatile (
+	    "0: @ loop						\n"
+	    "	subs    %[count], %[count], #1			\n"
+	    "	pld     [%[src]]				\n"
+	    "	add     %[src], %[src], #64			\n"
+	    "	bgt 0b						\n"
+
+	    /* Clobbered input registers marked as input/outputs */
+	    : [src] "+r" (src_ptr), [count] "+r" (count)
+	    :     /* no unclobbered inputs */
+	    : "cc"
+	    );
 #else
-		do {
-			__pld(src_ptr);
-			src_ptr += 64;
-		} while(--count);
-#endif
+	do
+	{
+	    __pld (src_ptr);
+	    src_ptr += 64;
 	}
+	while (--count);
+#endif
+    }
 
+    {
+	uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; /* deliberately not initialised */
+
+	/* row-major order */
+	/* left edge, middle block, right edge */
+	for ( ; height--; src_line += src_stride, aligned_line += dst_stride)
 	{
-		uint16_t scan_line[NEON_SCANLINE_BUFFER_PIXELS + 8]; // deliberately not initialised
-
-		// row-major order
-		// left edge, middle block, right edge
-		for( ; height--; src_line += src_stride, aligned_line += dst_stride) {
-			// Uncached framebuffer access is really, really slow if we do it piecemeal.
-			// It should be much faster if we grab it all at once.
-			// One scanline should easily fit in L1 cache, so this should not waste RAM bandwidth.
-			neon_quadword_copy(scan_line, aligned_line, copy_count, copy_tail);
-
-			// Apply the actual filter
-			ARGB8_over_565_8_pix_neon(src_line, scan_line + kernel_offset, src_stride * sizeof(*src_line), kernel_count);
-
-			// Copy the modified scanline back
-			neon_quadword_copy(dst_line, scan_line + copy_offset, width >> 3, (width & 7) * 2);
-		}
+	    /* Uncached framebuffer access is really, really slow if we do
+	     * it piecemeal. It should be much faster if we grab it all at
+	     * once. One scanline should easily fit in L1 cache, so this
+	     * should not waste RAM bandwidth.
+	     */
+	    neon_quadword_copy (scan_line, aligned_line, copy_count, copy_tail);
+
+	    /* Apply the actual filter */
+	    ARGB8_over_565_8_pix_neon (
+		src_line, scan_line + kernel_offset,
+		src_stride * sizeof(*src_line), kernel_count);
+
+	    /* Copy the modified scanline back */
+	    neon_quadword_copy (dst_line,
+				scan_line + copy_offset,
+				width >> 3, (width & 7) * 2);
 	}
+    }
 }
 
-#endif  // USE_GCC_INLINE_ASM
+#endif  /* USE_GCC_INLINE_ASM */
 
-static const pixman_fast_path_t arm_neon_fast_path_array[] = 
+static const pixman_fast_path_t arm_neon_fast_path_array[] =
 {
-    { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8888_8_8,        0 },
-    { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       neon_composite_add_8000_8000,       0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565,     0 },
-    { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_24_16,              0 },
-    { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_24_16,              0 },
-    { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_24_16,              0 },
-    { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_24_16,              0 },
+    { PIXMAN_OP_ADD,  PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8,       neon_composite_add_8888_8_8,     0 },
+    { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       neon_composite_add_8000_8000,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565,    0 },
+    { PIXMAN_OP_SRC,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_24_16,        0 },
+    { PIXMAN_OP_SRC,  PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_24_16,        0 },
+    { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_24_16,        0 },
+    { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_24_16,        0 },
 #ifdef USE_GCC_INLINE_ASM
-    { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_16_16,              0 },
-    { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_16_16,              0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_n_0565,           0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_n_0565,           0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565,         0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565,         0 },
+    { PIXMAN_OP_SRC,  PIXMAN_r5g6b5,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_16_16,        0 },
+    { PIXMAN_OP_SRC,  PIXMAN_b5g6r5,   PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_src_16_16,        0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_n_0565,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_n_0565,      0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_over_8888_0565,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_b5g6r5,   neon_composite_over_8888_0565,   0 },
 #endif
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_over_8888_8888,          0 },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888,        NEED_SOLID_MASK },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888,     0 },
-    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888,     0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, neon_composite_over_8888_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_over_8888_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_a8b8g8r8, neon_composite_over_8888_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_over_8888_8888,   0 },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_a8r8g8b8, PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_8888_n_8888, NEED_SOLID_MASK },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8r8g8b8, neon_composite_over_n_8_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8b8g8r8, neon_composite_over_n_8_8888,    0 },
+    { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_x8b8g8r8, neon_composite_over_n_8_8888,    0 },
     { PIXMAN_OP_NONE },
 };
 
@@ -2166,126 +2301,138 @@ const pixman_fast_path_t *const arm_neon_fast_paths = arm_neon_fast_path_array;
 
 static void
 arm_neon_composite (pixman_implementation_t *imp,
-		pixman_op_t     op,
-		pixman_image_t *src,
-		pixman_image_t *mask,
-		pixman_image_t *dest,
-		int32_t         src_x,
-		int32_t         src_y,
-		int32_t         mask_x,
-		int32_t         mask_y,
-		int32_t         dest_x,
-		int32_t         dest_y,
-		int32_t        width,
-		int32_t        height)
+                    pixman_op_t              op,
+                    pixman_image_t *         src,
+                    pixman_image_t *         mask,
+                    pixman_image_t *         dest,
+                    int32_t                  src_x,
+                    int32_t                  src_y,
+                    int32_t                  mask_x,
+                    int32_t                  mask_y,
+                    int32_t                  dest_x,
+                    int32_t                  dest_y,
+                    int32_t                  width,
+                    int32_t                  height)
 {
-	if (_pixman_run_fast_path (arm_neon_fast_paths, imp,
-			       op, src, mask, dest,
-			       src_x, src_y,
-			       mask_x, mask_y,
-			       dest_x, dest_y,
-			       width, height))
-	{
-		return;
-	}
+    if (_pixman_run_fast_path (arm_neon_fast_paths, imp,
+                               op, src, mask, dest,
+                               src_x, src_y,
+                               mask_x, mask_y,
+                               dest_x, dest_y,
+                               width, height))
+    {
+	return;
+    }
 
-	_pixman_implementation_composite (imp->delegate, op,
-				      src, mask, dest,
-				      src_x, src_y,
-				      mask_x, mask_y,
-				      dest_x, dest_y,
-				      width, height);
+    _pixman_implementation_composite (imp->delegate, op,
+                                      src, mask, dest,
+                                      src_x, src_y,
+                                      mask_x, mask_y,
+                                      dest_x, dest_y,
+                                      width, height);
 }
 
 static pixman_bool_t
-pixman_blt_neon (
-	void *src_bits,
-	void *dst_bits,
-	int src_stride,
-	int dst_stride,
-	int src_bpp,
-	int dst_bpp,
-	int src_x, int src_y,
-	int dst_x, int dst_y,
-	int width, int height)
+pixman_blt_neon (void *src_bits,
+                 void *dst_bits,
+                 int   src_stride,
+                 int   dst_stride,
+                 int   src_bpp,
+                 int   dst_bpp,
+                 int   src_x,
+                 int   src_y,
+                 int   dst_x,
+                 int   dst_y,
+                 int   width,
+                 int   height)
 {
-	if(!width || !height)
-		return TRUE;
+    if (!width || !height)
+	return TRUE;
 
-	// accelerate only straight copies involving complete bytes
-	if(src_bpp != dst_bpp || (src_bpp & 7))
-		return FALSE;
+    /* accelerate only straight copies involving complete bytes */
+    if (src_bpp != dst_bpp || (src_bpp & 7))
+	return FALSE;
 
+    {
+	uint32_t bytes_per_pixel = src_bpp >> 3;
+	uint32_t byte_width = width * bytes_per_pixel;
+	/* parameter is in words for some reason */
+	int32_t src_stride_bytes = src_stride * 4;
+	int32_t dst_stride_bytes = dst_stride * 4;
+	uint8_t *src_bytes = ((uint8_t*) src_bits) +
+	    src_y * src_stride_bytes + src_x * bytes_per_pixel;
+	uint8_t *dst_bytes = ((uint8_t*) dst_bits) +
+	    dst_y * dst_stride_bytes + dst_x * bytes_per_pixel;
+	uint32_t quadword_count = byte_width / 16;
+	uint32_t offset         = byte_width % 16;
+
+	while (height--)
 	{
-		uint32_t bytes_per_pixel = src_bpp >> 3;
-		uint32_t byte_width = width * bytes_per_pixel;
-		int32_t src_stride_bytes = src_stride * 4; // parameter is in words for some reason
-		int32_t dst_stride_bytes = dst_stride * 4;
-		uint8_t *src_bytes = ((uint8_t*) src_bits) + src_y * src_stride_bytes + src_x * bytes_per_pixel;
-		uint8_t *dst_bytes = ((uint8_t*) dst_bits) + dst_y * dst_stride_bytes + dst_x * bytes_per_pixel;
-		uint32_t quadword_count = byte_width / 16;
-		uint32_t offset         = byte_width % 16;
-
-		while(height--) {
-			neon_quadword_copy(dst_bytes, src_bytes, quadword_count, offset);
-			src_bytes += src_stride_bytes;
-			dst_bytes += dst_stride_bytes;
-		}
+	    neon_quadword_copy (dst_bytes, src_bytes, quadword_count, offset);
+	    src_bytes += src_stride_bytes;
+	    dst_bytes += dst_stride_bytes;
 	}
+    }
 
-	return TRUE;
+    return TRUE;
 }
 
 static pixman_bool_t
 arm_neon_blt (pixman_implementation_t *imp,
-	  uint32_t *src_bits,
-	  uint32_t *dst_bits,
-	  int src_stride,
-	  int dst_stride,
-	  int src_bpp,
-	  int dst_bpp,
-	  int src_x, int src_y,
-	  int dst_x, int dst_y,
-	  int width, int height)
+              uint32_t *               src_bits,
+              uint32_t *               dst_bits,
+              int                      src_stride,
+              int                      dst_stride,
+              int                      src_bpp,
+              int                      dst_bpp,
+              int                      src_x,
+              int                      src_y,
+              int                      dst_x,
+              int                      dst_y,
+              int                      width,
+              int                      height)
 {
-	if (pixman_blt_neon (
-			src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-			src_x, src_y, dst_x, dst_y, width, height))
-		return TRUE;
-
-	return _pixman_implementation_blt (
-			imp->delegate,
-			src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
-			src_x, src_y, dst_x, dst_y, width, height);
+    if (pixman_blt_neon (
+            src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+            src_x, src_y, dst_x, dst_y, width, height))
+    {
+	return TRUE;
+    }
+
+    return _pixman_implementation_blt (
+               imp->delegate,
+               src_bits, dst_bits, src_stride, dst_stride, src_bpp, dst_bpp,
+               src_x, src_y, dst_x, dst_y, width, height);
 }
 
 static pixman_bool_t
 arm_neon_fill (pixman_implementation_t *imp,
-	   uint32_t *bits,
-	   int stride,
-	   int bpp,
-	   int x,
-	   int y,
-	   int width,
-	   int height,
-	   uint32_t xor)
+               uint32_t *               bits,
+               int                      stride,
+               int                      bpp,
+               int                      x,
+               int                      y,
+               int                      width,
+               int                      height,
+               uint32_t xor)
 {
-	if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
-		return TRUE;
+    if (pixman_fill_neon (bits, stride, bpp, x, y, width, height, xor))
+	return TRUE;
 
-	return _pixman_implementation_fill (
-			imp->delegate, bits, stride, bpp, x, y, width, height, xor);
+    return _pixman_implementation_fill (
+	imp->delegate, bits, stride, bpp, x, y, width, height, xor);
 }
 
 pixman_implementation_t *
 _pixman_implementation_create_arm_neon (void)
 {
-	pixman_implementation_t *simd = _pixman_implementation_create_arm_simd();
-	pixman_implementation_t *imp  = _pixman_implementation_create (simd);
+    pixman_implementation_t *simd = _pixman_implementation_create_arm_simd ();
+    pixman_implementation_t *imp = _pixman_implementation_create (simd);
 
-	imp->composite = arm_neon_composite;
-	imp->blt = arm_neon_blt;
-	imp->fill = arm_neon_fill;
+    imp->composite = arm_neon_composite;
+    imp->blt = arm_neon_blt;
+    imp->fill = arm_neon_fill;
 
-	return imp;
+    return imp;
 }
+
commit 0af8ef742c6e1aa150b591bc7cdacb8d2293f7af
Author: Søren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Jul 12 18:04:21 2009 -0400

    Reindent and reformat pixman-access.c.

diff --git a/pixman/pixman-access.c b/pixman/pixman-access.c
index 4a4df07..d21c587 100644
--- a/pixman/pixman-access.c
+++ b/pixman/pixman-access.c
@@ -35,96 +35,132 @@
 #include "pixman-private.h"
 #include "pixman-accessor.h"
 
-#define CONVERT_RGB24_TO_Y15(s)       (((((s) >> 16) & 0xff) * 153 + \
-                                  (((s) >>  8) & 0xff) * 301 +		\
-                                  (((s)      ) & 0xff) * 58) >> 2)
-#define CONVERT_RGB24_TO_RGB15(s) ((((s) >> 3) & 0x001f) |  \
-			    (((s) >> 6) & 0x03e0) |  \
-			    (((s) >> 9) & 0x7c00))
-#define RGB16_TO_ENTRY(mif,rgb15) ((mif)->ent[rgb15])
-#define RGB24_TO_ENTRY(mif,rgb24) RGB16_TO_ENTRY(mif,CONVERT_RGB24_TO_RGB15(rgb24))
+#define CONVERT_RGB24_TO_Y15(s)						\
+    (((((s) >> 16) & 0xff) * 153 +					\
+      (((s) >>  8) & 0xff) * 301 +					\
+      (((s)      ) & 0xff) * 58) >> 2)
 
-#define RGB24_TO_ENTRY_Y(mif,rgb24) ((mif)->ent[CONVERT_RGB24_TO_Y15(rgb24)])
+#define CONVERT_RGB24_TO_RGB15(s)                                       \
+    ((((s) >> 3) & 0x001f) |                                            \
+     (((s) >> 6) & 0x03e0) |                                            \
+     (((s) >> 9) & 0x7c00))
+
+#define RGB16_TO_ENTRY(mif,rgb15)					\
+    ((mif)->ent[rgb15])
+
+#define RGB24_TO_ENTRY(mif,rgb24)					\
+    RGB16_TO_ENTRY (mif,CONVERT_RGB24_TO_RGB15 (rgb24))
+
+#define RGB24_TO_ENTRY_Y(mif,rgb24)					\
+    ((mif)->ent[CONVERT_RGB24_TO_Y15 (rgb24)])
 
 /*
  * YV12 setup and access macros
  */
 
-#define YV12_SETUP(image)						\
-    bits_image_t *__bits_image = (bits_image_t *)image;			\
-    uint32_t *bits = __bits_image->bits;					\
-    int stride = __bits_image->rowstride;					\
-    int offset0 = stride < 0 ?						\
-	((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride :	\
-	stride * __bits_image->height;					\
-    int offset1 = stride < 0 ?						\
-	offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) :	\
+#define YV12_SETUP(image)                                               \
+    bits_image_t *__bits_image = (bits_image_t *)image;                 \
+    uint32_t *bits = __bits_image->bits;                                \
+    int stride = __bits_image->rowstride;                               \
+    int offset0 = stride < 0 ?                                          \
+	((-stride) >> 1) * ((__bits_image->height - 1) >> 1) - stride : \
+	stride * __bits_image->height;                                  \
+    int offset1 = stride < 0 ?                                          \
+	offset0 + ((-stride) >> 1) * ((__bits_image->height) >> 1) :    \
 	offset0 + (offset0 >> 2)
+
 /* Note no trailing semicolon on the above macro; if it's there, then
  * the typical usage of YV12_SETUP(pict); will have an extra trailing ;
  * that some compilers will interpret as a statement -- and then any further
  * variable declarations will cause an error.
  */
 
-#define YV12_Y(line)		\
+#define YV12_Y(line)                                                    \
     ((uint8_t *) ((bits) + (stride) * (line)))
 
-#define YV12_U(line)	      \
-    ((uint8_t *) ((bits) + offset1 + \
-		((stride) >> 1) * ((line) >> 1)))
+#define YV12_U(line)                                                    \
+    ((uint8_t *) ((bits) + offset1 +                                    \
+                  ((stride) >> 1) * ((line) >> 1)))
 
-#define YV12_V(line)	      \
-    ((uint8_t *) ((bits) + offset0 + \
-		((stride) >> 1) * ((line) >> 1)))
+#define YV12_V(line)                                                    \
+    ((uint8_t *) ((bits) + offset0 +                                    \
+                  ((stride) >> 1) * ((line) >> 1)))
 
 /*********************************** Fetch ************************************/
 
 static void
-fetch_scanline_a8r8g8b8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a8r8g8b8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
-    MEMCPY_WRAPPED(image,
-                   buffer, (const uint32_t *)bits + x,
-		   width*sizeof(uint32_t));
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    
+    MEMCPY_WRAPPED (image,
+                    buffer, (const uint32_t *)bits + x,
+                    width * sizeof(uint32_t));
 }
 
 static void
-fetch_scanline_x8r8g8b8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x8r8g8b8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (const uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	*buffer++ = READ(image, pixel++) | 0xff000000;
-    }
+    
+    while (pixel < end)
+	*buffer++ = READ (image, pixel++) | 0xff000000;
 }
 
 static void
-fetch_scanline_a8b8g8r8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a8b8g8r8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(image, pixel++);
-	*buffer++ = (p & 0xff00ff00) |
-	            ((p >> 16) & 0xff) |
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	
+	*buffer++ = (p & 0xff00ff00)	|
+	    ((p >> 16) & 0xff)		|
 	    ((p & 0xff) << 16);
     }
 }
 
 static void
-fetch_scanline_x8b8g8r8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x8b8g8r8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(image, pixel++);
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	
 	*buffer++ = 0xff000000 |
 	    (p & 0x0000ff00) |
 	    ((p >> 16) & 0xff) |
@@ -133,665 +169,926 @@ fetch_scanline_x8b8g8r8 (pixman_image_t *image, int x, int y, int width, uint32_
 }
 
 static void
-fetch_scanline_b8g8r8a8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_b8g8r8a8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(image, pixel++);
-	*buffer++ = ((p & 0xff000000) >> 24) |
-	    ((p & 0x00ff0000) >> 8) |
-	    ((p & 0x0000ff00) << 8) |
-	    ((p & 0x000000ff) << 24);
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	
+	*buffer++ = (((p & 0xff000000) >> 24)	|
+	             ((p & 0x00ff0000) >> 8)	|
+	             ((p & 0x0000ff00) << 8)	|
+	             ((p & 0x000000ff) << 24));
     }
 }
 
 static void
-fetch_scanline_b8g8r8x8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_b8g8r8x8 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(image, pixel++);
-	*buffer++ = 0xff000000 |
-	    ((p & 0xff000000) >> 24) |
-	    ((p & 0x00ff0000) >> 8) |
-	    ((p & 0x0000ff00) << 8);
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	
+	*buffer++ = (0xff000000 |
+	             ((p & 0xff000000) >> 24)	|
+	             ((p & 0x00ff0000) >> 8)	|
+	             ((p & 0x0000ff00) << 8));
     }
 }
 
 /* Expects a uint64_t buffer */
 static void
-fetch_scanline_a2r10g10b10 (pixman_image_t *image, int x, int y, int width, uint32_t *b,
-		     const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a2r10g10b10 (pixman_image_t *image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            uint32_t *      b,
+                            const uint32_t *mask,
+                            uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = bits + x;
     const uint32_t *end = pixel + width;
     uint64_t *buffer = (uint64_t *)b;
     
-    while (pixel < end) {
-        uint32_t p = READ(image, pixel++);
-        uint64_t a = p >> 30;
-        uint64_t r = (p >> 20) & 0x3ff;
-        uint64_t g = (p >> 10) & 0x3ff;
-        uint64_t b = p & 0x3ff;
-
-        r = r << 6 | r >> 4;
-        g = g << 6 | g >> 4;
-        b = b << 6 | b >> 4;
-
-        a <<= 62;
-        a |= a >> 2;
-        a |= a >> 4;
-        a |= a >> 8;
-
-        *buffer++ = a << 48 | r << 32 | g << 16 | b;
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t a = p >> 30;
+	uint64_t r = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t b = p & 0x3ff;
+	
+	r = r << 6 | r >> 4;
+	g = g << 6 | g >> 4;
+	b = b << 6 | b >> 4;
+	
+	a <<= 62;
+	a |= a >> 2;
+	a |= a >> 4;
+	a |= a >> 8;
+	
+	*buffer++ = a << 48 | r << 32 | g << 16 | b;
     }
 }
 
 /* Expects a uint64_t buffer */
 static void
-fetch_scanline_x2r10g10b10 (pixman_image_t *image, int x, int y, int width, uint32_t *b,
-		     const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x2r10g10b10 (pixman_image_t *image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            uint32_t *      b,
+                            const uint32_t *mask,
+                            uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     uint64_t *buffer = (uint64_t *)b;
-
-    while (pixel < end) {
-        uint32_t p = READ(image, pixel++);
-        uint64_t r = (p >> 20) & 0x3ff;
-        uint64_t g = (p >> 10) & 0x3ff;
-        uint64_t b = p & 0x3ff;
-
-        r = r << 6 | r >> 4;
-        g = g << 6 | g >> 4;
-        b = b << 6 | b >> 4;
-
-        *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t r = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t b = p & 0x3ff;
+	
+	r = r << 6 | r >> 4;
+	g = g << 6 | g >> 4;
+	b = b << 6 | b >> 4;
+	
+	*buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
     }
 }
 
-
 /* Expects a uint64_t buffer */
 static void
-fetch_scanline_a2b10g10r10 (pixman_image_t *image, int x, int y, int width, uint32_t *b,
-			    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a2b10g10r10 (pixman_image_t *image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            uint32_t *      b,
+                            const uint32_t *mask,
+                            uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = bits + x;
     const uint32_t *end = pixel + width;
     uint64_t *buffer = (uint64_t *)b;
-
-    while (pixel < end) {
-        uint32_t p = READ(image, pixel++);
-        uint64_t a = p >> 30;
-        uint64_t b = (p >> 20) & 0x3ff;
-        uint64_t g = (p >> 10) & 0x3ff;
-        uint64_t r = p & 0x3ff;
-
-        r = r << 6 | r >> 4;
-        g = g << 6 | g >> 4;
-        b = b << 6 | b >> 4;
-
-        a <<= 62;
-        a |= a >> 2;
-        a |= a >> 4;
-        a |= a >> 8;
-
-        *buffer++ = a << 48 | r << 32 | g << 16 | b;
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t a = p >> 30;
+	uint64_t b = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t r = p & 0x3ff;
+	
+	r = r << 6 | r >> 4;
+	g = g << 6 | g >> 4;
+	b = b << 6 | b >> 4;
+	
+	a <<= 62;
+	a |= a >> 2;
+	a |= a >> 4;
+	a |= a >> 8;
+	
+	*buffer++ = a << 48 | r << 32 | g << 16 | b;
     }
 }
 
 /* Expects a uint64_t buffer */
 static void
-fetch_scanline_x2b10g10r10 (pixman_image_t *image, int x, int y, int width, uint32_t *b,
-		     const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x2b10g10r10 (pixman_image_t *image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            uint32_t *      b,
+                            const uint32_t *mask,
+                            uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint32_t *pixel = (uint32_t *)bits + x;
     const uint32_t *end = pixel + width;
     uint64_t *buffer = (uint64_t *)b;
-
-    while (pixel < end) {
-        uint32_t p = READ(image, pixel++);
-        uint64_t b = (p >> 20) & 0x3ff;
-        uint64_t g = (p >> 10) & 0x3ff;
-        uint64_t r = p & 0x3ff;
-
-        r = r << 6 | r >> 4;
-        g = g << 6 | g >> 4;
-        b = b << 6 | b >> 4;
-
-        *buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint64_t b = (p >> 20) & 0x3ff;
+	uint64_t g = (p >> 10) & 0x3ff;
+	uint64_t r = p & 0x3ff;
+	
+	r = r << 6 | r >> 4;
+	g = g << 6 | g >> 4;
+	b = b << 6 | b >> 4;
+	
+	*buffer++ = 0xffffULL << 48 | r << 32 | g << 16 | b;
     }
 }
 
 static void
-fetch_scanline_r8g8b8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_r8g8b8 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
-    const uint8_t *end = pixel + 3*width;
-    while (pixel < end) {
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + 3 * x;
+    const uint8_t *end = pixel + 3 * width;
+    
+    while (pixel < end)
+    {
 	uint32_t b = 0xff000000;
+
 #ifdef WORDS_BIGENDIAN
-	b |= (READ(image, pixel++) << 16);
-	b |= (READ(image, pixel++) << 8);
-	b |= (READ(image, pixel++));
+	b |= (READ (image, pixel++) << 16);
+	b |= (READ (image, pixel++) << 8);
+	b |= (READ (image, pixel++));
 #else
-	b |= (READ(image, pixel++));
-	b |= (READ(image, pixel++) << 8);
-	b |= (READ(image, pixel++) << 16);
+	b |= (READ (image, pixel++));
+	b |= (READ (image, pixel++) << 8);
+	b |= (READ (image, pixel++) << 16);
 #endif
+
 	*buffer++ = b;
     }
 }
 
 static void
-fetch_scanline_b8g8r8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_b8g8r8 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
-    const uint8_t *pixel = (const uint8_t *)bits + 3*x;
-    const uint8_t *end = pixel + 3*width;
-    while (pixel < end) {
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
+    const uint8_t *pixel = (const uint8_t *)bits + 3 * x;
+    const uint8_t *end = pixel + 3 * width;
+    
+    while (pixel < end)
+    {
 	uint32_t b = 0xff000000;
 #ifdef WORDS_BIGENDIAN
-	b |= (READ(image, pixel++));
-	b |= (READ(image, pixel++) << 8);
-	b |= (READ(image, pixel++) << 16);
+	b |= (READ (image, pixel++));
+	b |= (READ (image, pixel++) << 8);
+	b |= (READ (image, pixel++) << 16);
 #else
-	b |= (READ(image, pixel++) << 16);
-	b |= (READ(image, pixel++) << 8);
-	b |= (READ(image, pixel++));
+	b |= (READ (image, pixel++) << 16);
+	b |= (READ (image, pixel++) << 8);
+	b |= (READ (image, pixel++));
 #endif
 	*buffer++ = b;
     }
 }
 
 static void
-fetch_scanline_r5g6b5 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_r5g6b5 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t p = READ(image, pixel++);
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
 	uint32_t r = (((p) << 3) & 0xf8) |
 	    (((p) << 5) & 0xfc00) |
 	    (((p) << 8) & 0xf80000);
+	
 	r |= (r >> 5) & 0x70007;
 	r |= (r >> 6) & 0x300;
+
 	*buffer++ = 0xff000000 | r;
     }
 }
 
 static void
-fetch_scanline_b5g6r5 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_b5g6r5 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	b = ((p & 0xf800) | ((p & 0xe000) >> 5)) >> 8;
 	g = ((p & 0x07e0) | ((p & 0x0600) >> 6)) << 5;
 	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a1r5g5b5 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a1r5g5b5 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b, a;
+	
 	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
 	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
 	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
+
 	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_x1r5g5b5 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x1r5g5b5 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	r = ((p & 0x7c00) | ((p & 0x7000) >> 5)) << 9;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
 	b = ((p & 0x001c) | ((p & 0x001f) << 5)) >> 2;
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a1b5g5r5 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a1b5g5r5 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    uint32_t r, g, b, a;
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	
 	a = (uint32_t) ((uint8_t) (0 - ((p & 0x8000) >> 15))) << 24;
 	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
 	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+
 	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_x1b5g5r5 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x1b5g5r5 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	b = ((p & 0x7c00) | ((p & 0x7000) >> 5)) >> 7;
 	g = ((p & 0x03e0) | ((p & 0x0380) >> 5)) << 6;
 	r = ((p & 0x001c) | ((p & 0x001f) << 5)) << 14;
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a4r4g4b4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a4r4g4b4 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b, a;
+	
 	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
 	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
 	b = ((p & 0x000f) | ((p & 0x000f) << 4));
+
 	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_x4r4g4b4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x4r4g4b4 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	r = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) << 12;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
 	b = ((p & 0x000f) | ((p & 0x000f) << 4));
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a4b4g4r4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a4b4g4r4 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b, a;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b, a;
+	
 	a = ((p & 0xf000) | ((p & 0xf000) >> 4)) << 16;
 	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
 	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
+
 	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_x4b4g4r4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x4b4g4r4 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint16_t *pixel = (const uint16_t *)bits + x;
     const uint16_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	b = ((p & 0x0f00) | ((p & 0x0f00) >> 4)) >> 4;
 	g = ((p & 0x00f0) | ((p & 0x00f0) >> 4)) << 8;
 	r = ((p & 0x000f) | ((p & 0x000f) << 4)) << 16;
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a8 (pixman_image_t *image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   uint32_t *      buffer,
+                   const uint32_t *mask,
+                   uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	*buffer++ = READ(image, pixel++) << 24;
-    }
+    
+    while (pixel < end)
+	*buffer++ = READ (image, pixel++) << 24;
 }
 
 static void
-fetch_scanline_r3g3b2 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_r3g3b2 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	r = ((p & 0xe0) | ((p & 0xe0) >> 3) | ((p & 0xc0) >> 6)) << 16;
 	g = ((p & 0x1c) | ((p & 0x18) >> 3) | ((p & 0x1c) << 3)) << 8;
 	b = (((p & 0x03)     ) |
 	     ((p & 0x03) << 2) |
 	     ((p & 0x03) << 4) |
 	     ((p & 0x03) << 6));
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_b2g3r3 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_b2g3r3 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t r, g, b;
+	
 	b = (((p & 0xc0)     ) |
 	     ((p & 0xc0) >> 2) |
 	     ((p & 0xc0) >> 4) |
 	     ((p & 0xc0) >> 6));
+
 	g = ((p & 0x38) | ((p & 0x38) >> 3) | ((p & 0x30) << 2)) << 8;
+
 	r = (((p & 0x07)     ) |
 	     ((p & 0x07) << 3) |
 	     ((p & 0x06) << 6)) << 16;
+
 	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a2r2g2b2 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a2r2g2b2 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t   a,r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t a, r, g, b;
+	
 	a = ((p & 0xc0) * 0x55) << 18;
 	r = ((p & 0x30) * 0x55) << 12;
 	g = ((p & 0x0c) * 0x55) << 6;
 	b = ((p & 0x03) * 0x55);
-	*buffer++ = a|r|g|b;
+
+	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a2b2g2r2 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a2b2g2r2 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t   a,r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
-
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+	uint32_t a, r, g, b;
+	
 	a = ((p & 0xc0) * 0x55) << 18;
 	b = ((p & 0x30) * 0x55) >> 6;
 	g = ((p & 0x0c) * 0x55) << 6;
 	r = ((p & 0x03) * 0x55) << 16;
-	*buffer++ = a|r|g|b;
+
+	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_c8 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_c8 (pixman_image_t *image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   uint32_t *      buffer,
+                   const uint32_t *mask,
+                   uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const pixman_indexed_t * indexed = image->bits.indexed;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint32_t  p = READ(image, pixel++);
+    
+    while (pixel < end)
+    {
+	uint32_t p = READ (image, pixel++);
+
 	*buffer++ = indexed->rgba[p];
     }
 }
 
 static void
-fetch_scanline_x4a4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	      const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_x4a4 (pixman_image_t *image,
+                     int             x,
+                     int             y,
+                     int             width,
+                     uint32_t *      buffer,
+                     const uint32_t *mask,
+                     uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const uint8_t *pixel = (const uint8_t *)bits + x;
     const uint8_t *end = pixel + width;
-    while (pixel < end) {
-	uint8_t p = READ(image, pixel++) & 0xf;
+    
+    while (pixel < end)
+    {
+	uint8_t p = READ (image, pixel++) & 0xf;
+
 	*buffer++ = (p | (p << 4)) << 24;
     }
 }
 
-#define FETCH_8(img,l,o)    (READ(img, (uint8_t *)(l) + ((o) >> 2)))
+#define FETCH_8(img,l,o)    (READ (img, (uint8_t *)(l) + ((o) >> 2)))
 #ifdef WORDS_BIGENDIAN
-#define FETCH_4(img,l,o)    ((o) & 2 ? FETCH_8(img,l,o) & 0xf : FETCH_8(img,l,o) >> 4)
+#define FETCH_4(img,l,o)    ((o) & 2 ? FETCH_8 (img,l,o) & 0xf : FETCH_8 (img,l,o) >> 4)
 #else
-#define FETCH_4(img,l,o)    ((o) & 2 ? FETCH_8(img,l,o) >> 4 : FETCH_8(img,l,o) & 0xf)
+#define FETCH_4(img,l,o)    ((o) & 2 ? FETCH_8 (img,l,o) >> 4 : FETCH_8 (img,l,o) & 0xf)
 #endif
 
 static void
-fetch_scanline_a4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a4 (pixman_image_t *image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   uint32_t *      buffer,
+                   const uint32_t *mask,
+                   uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = FETCH_4(image, bits, i + x);
-
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = FETCH_4 (image, bits, i + x);
+	
 	p |= p << 4;
+
 	*buffer++ = p << 24;
     }
 }
 
 static void
-fetch_scanline_r1g2b1 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_r1g2b1 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = FETCH_4(image, bits, i + x);
-
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = FETCH_4 (image, bits, i + x);
+	uint32_t r, g, b;
+	
 	r = ((p & 0x8) * 0xff) << 13;
 	g = ((p & 0x6) * 0x55) << 7;
 	b = ((p & 0x1) * 0xff);
-	*buffer++ = 0xff000000|r|g|b;
+	
+	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_b1g2r1 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_b1g2r1 (pixman_image_t *image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       uint32_t *      buffer,
+                       const uint32_t *mask,
+                       uint32_t        mask_bits)
 {
-    uint32_t  r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = FETCH_4(image, bits, i + x);
-
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = FETCH_4 (image, bits, i + x);
+	uint32_t r, g, b;
+	
 	b = ((p & 0x8) * 0xff) >> 3;
 	g = ((p & 0x6) * 0x55) << 7;
 	r = ((p & 0x1) * 0xff) << 16;
-	*buffer++ = 0xff000000|r|g|b;
+	
+	*buffer++ = 0xff000000 | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a1r1g1b1 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a1r1g1b1 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  a,r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    uint32_t a, r, g, b;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = FETCH_4(image, bits, i + x);
-
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = FETCH_4 (image, bits, i + x);
+	
 	a = ((p & 0x8) * 0xff) << 21;
 	r = ((p & 0x4) * 0xff) << 14;
 	g = ((p & 0x2) * 0xff) << 7;
 	b = ((p & 0x1) * 0xff);
-	*buffer++ = a|r|g|b;
+	
+	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_a1b1g1r1 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		  const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a1b1g1r1 (pixman_image_t *image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         uint32_t *      buffer,
+                         const uint32_t *mask,
+                         uint32_t        mask_bits)
 {
-    uint32_t  a,r,g,b;
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = FETCH_4(image, bits, i + x);
-
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = FETCH_4 (image, bits, i + x);
+	uint32_t a, r, g, b;
+	
 	a = ((p & 0x8) * 0xff) << 21;
 	r = ((p & 0x4) * 0xff) >> 3;
 	g = ((p & 0x2) * 0xff) << 7;
 	b = ((p & 0x1) * 0xff) << 16;
-	*buffer++ = a|r|g|b;
+	
+	*buffer++ = a | r | g | b;
     }
 }
 
 static void
-fetch_scanline_c4 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_c4 (pixman_image_t *image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   uint32_t *      buffer,
+                   const uint32_t *mask,
+                   uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const pixman_indexed_t * indexed = image->bits.indexed;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = FETCH_4 (image, bits, i + x);
-
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = FETCH_4 (image, bits, i + x);
+	
 	*buffer++ = indexed->rgba[p];
     }
 }
 
-
 static void
-fetch_scanline_a1 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_a1 (pixman_image_t *image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   uint32_t *      buffer,
+                   const uint32_t *mask,
+                   uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t  p = READ(image, bits + ((i + x) >> 5));
-	uint32_t  a;
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = READ (image, bits + ((i + x) >> 5));
+	uint32_t a;
+	
 #ifdef WORDS_BIGENDIAN
-	a = p >> (0x1f - ((i+x) & 0x1f));
+	a = p >> (0x1f - ((i + x) & 0x1f));
 #else
-	a = p >> ((i+x) & 0x1f);
+	a = p >> ((i + x) & 0x1f);
 #endif
 	a = a & 1;
 	a |= a << 1;
 	a |= a << 2;
 	a |= a << 4;
+	
 	*buffer++ = a << 24;
     }
 }
 
 static void
-fetch_scanline_g1 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-	    const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_g1 (pixman_image_t *image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   uint32_t *      buffer,
+                   const uint32_t *mask,
+                   uint32_t        mask_bits)
 {
-    const uint32_t *bits = image->bits.bits + y*image->bits.rowstride;
+    const uint32_t *bits = image->bits.bits + y * image->bits.rowstride;
     const pixman_indexed_t * indexed = image->bits.indexed;
     int i;
-    for (i = 0; i < width; ++i) {
-	uint32_t p = READ(image, bits + ((i+x) >> 5));
+    
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t p = READ (image, bits + ((i + x) >> 5));
 	uint32_t a;
+	
 #ifdef WORDS_BIGENDIAN
-	a = p >> (0x1f - ((i+x) & 0x1f));
+	a = p >> (0x1f - ((i + x) & 0x1f));
 #else
-	a = p >> ((i+x) & 0x1f);
+	a = p >> ((i + x) & 0x1f);
 #endif
 	a = a & 1;
+	
 	*buffer++ = indexed->rgba[a];
     }
 }
 
 static void
-fetch_scanline_yuy2 (pixman_image_t *image, int x, int line, int width, uint32_t *buffer,
-	      const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_yuy2 (pixman_image_t *image,
+                     int             x,
+                     int             line,
+                     int             width,
+                     uint32_t *      buffer,
+                     const uint32_t *mask,
+                     uint32_t        mask_bits)
 {
-    int16_t y, u, v;
-    int32_t r, g, b;
-    int   i;
-
     const uint32_t *bits = image->bits.bits + image->bits.rowstride * line;
-
+    int i;
+    
     for (i = 0; i < width; i++)
     {
+	int16_t y, u, v;
+	int32_t r, g, b;
+	
 	y = ((uint8_t *) bits)[(x + i) << 1] - 16;
-	u = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 1] - 128;
-	v = ((uint8_t *) bits)[(((x + i) << 1) & -4) + 3] - 128;
-
+	u = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 1] - 128;
+	v = ((uint8_t *) bits)[(((x + i) << 1) & - 4) + 3] - 128;
+	
 	/* R = 1.164(Y - 16) + 1.596(V - 128) */
 	r = 0x012b27 * y + 0x019a2e * v;
 	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 	b = 0x012b27 * y + 0x0206a2 * u;
-
+	
 	*buffer++ = 0xff000000 |
 	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
@@ -800,30 +1097,36 @@ fetch_scanline_yuy2 (pixman_image_t *image, int x, int line, int width, uint32_t
 }
 
 static void
-fetch_scanline_yv12 (pixman_image_t *image, int x, int line, int width, uint32_t *buffer,
-	      const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_yv12 (pixman_image_t *image,
+                     int             x,
+                     int             line,
+                     int             width,
+                     uint32_t *      buffer,
+                     const uint32_t *mask,
+                     uint32_t        mask_bits)
 {
-    YV12_SETUP(image);
+    YV12_SETUP (image);
     uint8_t *y_line = YV12_Y (line);
     uint8_t *u_line = YV12_U (line);
     uint8_t *v_line = YV12_V (line);
-    int16_t y, u, v;
-    int32_t r, g, b;
-    int   i;
-
+    int i;
+    
     for (i = 0; i < width; i++)
     {
+	int16_t y, u, v;
+	int32_t r, g, b;
+	
 	y = y_line[x + i] - 16;
 	u = u_line[(x + i) >> 1] - 128;
 	v = v_line[(x + i) >> 1] - 128;
-
+	
 	/* R = 1.164(Y - 16) + 1.596(V - 128) */
 	r = 0x012b27 * y + 0x019a2e * v;
 	/* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 	g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
 	/* B = 1.164(Y - 16) + 2.018(U - 128) */
 	b = 0x012b27 * y + 0x0206a2 * u;
-
+	
 	*buffer++ = 0xff000000 |
 	    (r >= 0 ? r < 0x1000000 ? r         & 0xff0000 : 0xff0000 : 0) |
 	    (g >= 0 ? g < 0x1000000 ? (g >> 8)  & 0x00ff00 : 0x00ff00 : 0) |
@@ -835,38 +1138,40 @@ fetch_scanline_yv12 (pixman_image_t *image, int x, int line, int width, uint32_t
 
 /* Despite the type, expects a uint64_t buffer */
 static void
-fetch_pixels_a2r10g10b10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
+fetch_pixels_a2r10g10b10_64 (bits_image_t *pict,
+                             uint32_t *    b,
+                             int           n_pixels)
 {
     int i;
     uint64_t *buffer = (uint64_t *)b;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = ((uint32_t *)buffer)[2 * i];
 	int line = ((uint32_t *)buffer)[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t p = READ(pict, bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t p = READ (pict, bits + offset);
 	    uint64_t a = p >> 30;
 	    uint64_t r = (p >> 20) & 0x3ff;
 	    uint64_t g = (p >> 10) & 0x3ff;
 	    uint64_t b = p & 0x3ff;
-
+	    
 	    r = r << 6 | r >> 4;
 	    g = g << 6 | g >> 4;
 	    b = b << 6 | b >> 4;
-
+	    
 	    a <<= 62;
 	    a |= a >> 2;
 	    a |= a >> 4;
 	    a |= a >> 8;
-
+	    
 	    buffer[i] = a << 48 | r << 32 | g << 16 | b;
 	}
     }
@@ -874,32 +1179,34 @@ fetch_pixels_a2r10g10b10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
 
 /* Despite the type, this function expects a uint64_t buffer */
 static void
-fetch_pixels_x2r10g10b10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
+fetch_pixels_x2r10g10b10_64 (bits_image_t *pict,
+                             uint32_t *    b,
+                             int           n_pixels)
 {
     uint64_t *buffer = (uint64_t *)b;
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = ((uint32_t *)buffer)[2 * i];
 	int line = ((uint32_t *)buffer)[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t p = READ(pict, bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t p = READ (pict, bits + offset);
 	    uint64_t r = (p >> 20) & 0x3ff;
 	    uint64_t g = (p >> 10) & 0x3ff;
 	    uint64_t b = p & 0x3ff;
-
+	    
 	    r = r << 6 | r >> 4;
 	    g = g << 6 | g >> 4;
 	    b = b << 6 | b >> 4;
-
+	    
 	    buffer[i] = 0xffffULL << 48 | r << 32 | g << 16 | b;
 	}
     }
@@ -907,24 +1214,26 @@ fetch_pixels_x2r10g10b10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
 
 /* Despite the type, expects a uint64_t buffer */
 static void
-fetch_pixels_a2b10g10r10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
+fetch_pixels_a2b10g10r10_64 (bits_image_t *pict,
+                             uint32_t *    b,
+                             int           n_pixels)
 {
-    int i;
     uint64_t *buffer = (uint64_t *)b;
-
+    int i;
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = ((uint32_t *)buffer)[2 * i];
 	int line = ((uint32_t *)buffer)[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t p = READ(pict, bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t p = READ (pict, bits + offset);
 	    uint64_t a = p >> 30;
 	    uint64_t b = (p >> 20) & 0x3ff;
 	    uint64_t g = (p >> 10) & 0x3ff;
@@ -938,7 +1247,7 @@ fetch_pixels_a2b10g10r10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
 	    a |= a >> 2;
 	    a |= a >> 4;
 	    a |= a >> 8;
-
+	    
 	    buffer[i] = a << 48 | r << 32 | g << 16 | b;
 	}
     }
@@ -946,24 +1255,26 @@ fetch_pixels_a2b10g10r10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
 
 /* Despite the type, this function expects a uint64_t buffer */
 static void
-fetch_pixels_x2b10g10r10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
+fetch_pixels_x2b10g10r10_64 (bits_image_t *pict,
+                             uint32_t *    b,
+                             int           n_pixels)
 {
     uint64_t *buffer = (uint64_t *)b;
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = ((uint32_t *)buffer)[2 * i];
 	int line = ((uint32_t *)buffer)[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t p = READ(pict, bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t p = READ (pict, bits + offset);
 	    uint64_t b = (p >> 20) & 0x3ff;
 	    uint64_t g = (p >> 10) & 0x3ff;
 	    uint64_t r = p & 0x3ff;
@@ -971,14 +1282,16 @@ fetch_pixels_x2b10g10r10_64 (bits_image_t *pict, uint32_t *b, int n_pixels)
 	    r = r << 6 | r >> 4;
 	    g = g << 6 | g >> 4;
 	    b = b << 6 | b >> 4;
-
+	    
 	    buffer[i] = 0xffffULL << 48 | r << 32 | g << 16 | b;
 	}
     }
 }
 
 static void
-fetch_pixels_a8r8g8b8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a8r8g8b8 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
     
@@ -993,14 +1306,16 @@ fetch_pixels_a8r8g8b8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    buffer[i] = READ(pict, (uint32_t *)bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    buffer[i] = READ (pict, (uint32_t *)bits + offset);
 	}
     }
 }
 
 static void
-fetch_pixels_x8r8g8b8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x8r8g8b8 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
     
@@ -1015,14 +1330,16 @@ fetch_pixels_x8r8g8b8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    buffer[i] = READ(pict, (uint32_t *)bits + offset) | 0xff000000;
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    buffer[i] = READ (pict, (uint32_t *)bits + offset) | 0xff000000;
 	}
     }
 }
 
 static void
-fetch_pixels_a8b8g8r8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a8b8g8r8 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
     
@@ -1037,19 +1354,21 @@ fetch_pixels_a8b8g8r8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint32_t *)bits + offset);
 	    
 	    buffer[i] = ((pixel & 0xff000000) |
-			 ((pixel >> 16) & 0xff) |
-			 (pixel & 0x0000ff00) |
-			 ((pixel & 0xff) << 16));
+	                 ((pixel >> 16) & 0xff) |
+	                 (pixel & 0x0000ff00) |
+	                 ((pixel & 0xff) << 16));
 	}
     }
 }
 
 static void
-fetch_pixels_x8b8g8r8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x8b8g8r8 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
     
@@ -1064,49 +1383,53 @@ fetch_pixels_x8b8g8r8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint32_t *)bits + offset);
 	    
-	    buffer[i] = ((0xff000000) |
-			 ((pixel >> 16) & 0xff) |
-			 (pixel & 0x0000ff00) |
-			 ((pixel & 0xff) << 16));
+	    buffer[i] = (0xff000000) |
+		((pixel >> 16) & 0xff) |
+		(pixel & 0x0000ff00) |
+		((pixel & 0xff) << 16);
 	}
     }
 }
 
 static void
-fetch_pixels_b8g8r8a8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_b8g8r8a8 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint32_t *)bits + offset);
 	    
 	    buffer[i] = ((pixel & 0xff000000) >> 24 |
-			 (pixel & 0x00ff0000) >> 8 |
-			 (pixel & 0x0000ff00) << 8 |
-			 (pixel & 0x000000ff) << 24);
+	                 (pixel & 0x00ff0000) >> 8 |
+	                 (pixel & 0x0000ff00) << 8 |
+	                 (pixel & 0x000000ff) << 24);
 	}
     }
 }
 
 static void
-fetch_pixels_b8g8r8x8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_b8g8r8x8 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
@@ -1118,385 +1441,422 @@ fetch_pixels_b8g8r8x8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint32_t *)bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint32_t *)bits + offset);
 	    
 	    buffer[i] = ((0xff000000) |
-			 (pixel & 0xff000000) >> 24 |
-			 (pixel & 0x00ff0000) >> 8 |
-			 (pixel & 0x0000ff00) << 8);
+	                 (pixel & 0xff000000) >> 24 |
+	                 (pixel & 0x00ff0000) >> 8 |
+	                 (pixel & 0x0000ff00) << 8);
 	}
     }
 }
 
 static void
-fetch_pixels_r8g8b8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_r8g8b8 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint8_t   *pixel = ((uint8_t *) bits) + (offset * 3);
+	    
 #ifdef WORDS_BIGENDIAN
 	    buffer[i] = (0xff000000 |
-			 (READ(pict, pixel + 0) << 16) |
-			 (READ(pict, pixel + 1) << 8) |
-			 (READ(pict, pixel + 2)));
+	                 (READ (pict, pixel + 0) << 16) |
+	                 (READ (pict, pixel + 1) << 8) |
+	                 (READ (pict, pixel + 2)));
 #else
 	    buffer[i] = (0xff000000 |
-			 (READ(pict, pixel + 2) << 16) |
-			 (READ(pict, pixel + 1) << 8) |
-			 (READ(pict, pixel + 0)));
+	                 (READ (pict, pixel + 2) << 16) |
+	                 (READ (pict, pixel + 1) << 8) |
+	                 (READ (pict, pixel + 0)));
 #endif
 	}
     }
 }
 
 static void
-fetch_pixels_b8g8r8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_b8g8r8 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint8_t   *pixel = ((uint8_t *) bits) + (offset*3);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint8_t   *pixel = ((uint8_t *) bits) + (offset * 3);
 #ifdef WORDS_BIGENDIAN
 	    buffer[i] = (0xff000000 |
-			 (READ(pict, pixel + 2) << 16) |
-			 (READ(pict, pixel + 1) << 8) |
-			 (READ(pict, pixel + 0)));
+	                 (READ (pict, pixel + 2) << 16) |
+	                 (READ (pict, pixel + 1) << 8) |
+	                 (READ (pict, pixel + 0)));
 #else
 	    buffer[i] = (0xff000000 |
-			 (READ(pict, pixel + 0) << 16) |
-			 (READ(pict, pixel + 1) << 8) |
-			 (READ(pict, pixel + 2)));
+	                 (READ (pict, pixel + 0) << 16) |
+	                 (READ (pict, pixel + 1) << 8) |
+	                 (READ (pict, pixel + 2)));
 #endif
 	}
     }
 }
 
 static void
-fetch_pixels_r5g6b5 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_r5g6b5 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t r, g, b;
 	    
 	    r = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) << 8;
 	    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
 	    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_b5g6r5 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_b5g6r5 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t r, g, b;
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
 	    
 	    b = ((pixel & 0xf800) | ((pixel & 0xe000) >> 5)) >> 8;
 	    g = ((pixel & 0x07e0) | ((pixel & 0x0600) >> 6)) << 5;
 	    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_a1r5g5b5 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a1r5g5b5 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
 	    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
 	    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
 	    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+	    
 	    buffer[i] = (a | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_x1r5g5b5 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x1r5g5b5 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t r, g, b;
 	    
 	    r = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) << 9;
 	    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
 	    b = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) >> 2;
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_a1b5g5r5 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a1b5g5r5 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = (uint32_t) ((uint8_t) (0 - ((pixel & 0x8000) >> 15))) << 24;
 	    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
 	    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
 	    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+	    
 	    buffer[i] = (a | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_x1b5g5r5 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x1b5g5r5 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t r, g, b;
 	    
 	    b = ((pixel & 0x7c00) | ((pixel & 0x7000) >> 5)) >> 7;
 	    g = ((pixel & 0x03e0) | ((pixel & 0x0380) >> 5)) << 6;
 	    r = ((pixel & 0x001c) | ((pixel & 0x001f) << 5)) << 14;
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_a4r4g4b4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a4r4g4b4 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
 	    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
 	    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
 	    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
+	    
 	    buffer[i] = (a | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_x4r4g4b4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x4r4g4b4 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t r, g, b;
 	    
 	    r = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) << 12;
 	    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
 	    b = ((pixel & 0x000f) | ((pixel & 0x000f) << 4));
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_a4b4g4r4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a4b4g4r4 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = ((pixel & 0xf000) | ((pixel & 0xf000) >> 4)) << 16;
 	    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
 	    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
 	    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
+	    
 	    buffer[i] = (a | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_x4b4g4r4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x4b4g4r4 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, (uint16_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint16_t *) bits + offset);
+	    uint32_t r, g, b;
 	    
 	    b = ((pixel & 0x0f00) | ((pixel & 0x0f00) >> 4)) >> 4;
 	    g = ((pixel & 0x00f0) | ((pixel & 0x00f0) >> 4)) << 8;
 	    r = ((pixel & 0x000f) | ((pixel & 0x000f) << 4)) << 16;
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_a8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a8 (bits_image_t *pict,
+                 uint32_t *    buffer,
+                 int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
 	    
 	    buffer[i] = pixel << 24;
 	}
@@ -1504,169 +1864,196 @@ fetch_pixels_a8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 }
 
 static void
-fetch_pixels_r3g3b2 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_r3g3b2 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
+	    uint32_t r, g, b;
+	    
+	    r = ((pixel & 0xe0) |
+	         ((pixel & 0xe0) >> 3) |
+	         ((pixel & 0xc0) >> 6)) << 16;
+	    
+	    g = ((pixel & 0x1c) |
+	         ((pixel & 0x18) >> 3) |
+	         ((pixel & 0x1c) << 3)) << 8;
 	    
-	    r = ((pixel & 0xe0) | ((pixel & 0xe0) >> 3) | ((pixel & 0xc0) >> 6)) << 16;
-	    g = ((pixel & 0x1c) | ((pixel & 0x18) >> 3) | ((pixel & 0x1c) << 3)) << 8;
 	    b = (((pixel & 0x03)     ) |
-		 ((pixel & 0x03) << 2) |
-		 ((pixel & 0x03) << 4) |
-		 ((pixel & 0x03) << 6));
+	         ((pixel & 0x03) << 2) |
+	         ((pixel & 0x03) << 4) |
+	         ((pixel & 0x03) << 6));
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_b2g3r3 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_b2g3r3 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
-	    
-	    b = (((pixel & 0xc0)     ) |
-		 ((pixel & 0xc0) >> 2) |
-		 ((pixel & 0xc0) >> 4) |
-		 ((pixel & 0xc0) >> 6));
-	    g = ((pixel & 0x38) | ((pixel & 0x38) >> 3) | ((pixel & 0x30) << 2)) << 8;
-	    r = (((pixel & 0x07)     ) |
-		 ((pixel & 0x07) << 3) |
-		 ((pixel & 0x06) << 6)) << 16;
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
+	    uint32_t r, g, b;
+	    
+	    b = ((pixel & 0xc0)         |
+	         ((pixel & 0xc0) >> 2)  |
+	         ((pixel & 0xc0) >> 4)  |
+	         ((pixel & 0xc0) >> 6));
+	    
+	    g = ((pixel & 0x38)         |
+	         ((pixel & 0x38) >> 3)  |
+	         ((pixel & 0x30) << 2)) << 8;
+	    
+	    r = ((pixel & 0x07)         |
+	         ((pixel & 0x07) << 3)  |
+	         ((pixel & 0x06) << 6)) << 16;
+	    
 	    buffer[i] = (0xff000000 | r | g | b);
 	}
     }
 }
 
 static void
-fetch_pixels_a2r2g2b2 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a2r2g2b2 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t   a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = ((pixel & 0xc0) * 0x55) << 18;
 	    r = ((pixel & 0x30) * 0x55) << 12;
 	    g = ((pixel & 0x0c) * 0x55) << 6;
 	    b = ((pixel & 0x03) * 0x55);
-	    buffer[i] = a|r|g|b;
+	    
+	    buffer[i] = a | r | g | b;
 	}
     }
 }
 
 static void
-fetch_pixels_a2b2g2r2 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a2b2g2r2 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t   a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = ((pixel & 0xc0) * 0x55) << 18;
 	    b = ((pixel & 0x30) * 0x55) >> 6;
 	    g = ((pixel & 0x0c) * 0x55) << 6;
 	    r = ((pixel & 0x03) * 0x55) << 16;
-	    buffer[i] = a|r|g|b;
+	    
+	    buffer[i] = a | r | g | b;
 	}
     }
 }
 
 static void
-fetch_pixels_c8 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_c8 (bits_image_t *pict,
+                 uint32_t *    buffer,
+                 int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
 	    const pixman_indexed_t * indexed = pict->indexed;
+	    
 	    buffer[i] = indexed->rgba[pixel];
 	}
     }
 }
 
 static void
-fetch_pixels_x4a4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_x4a4 (bits_image_t *pict,
+                   uint32_t *    buffer,
+                   int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t   pixel = READ(pict, (uint8_t *) bits + offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, (uint8_t *) bits + offset);
 	    
 	    buffer[i] = ((pixel & 0xf) | ((pixel & 0xf) << 4)) << 24;
 	}
@@ -1674,23 +2061,25 @@ fetch_pixels_x4a4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 }
 
 static void
-fetch_pixels_a4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a4 (bits_image_t *pict,
+                 uint32_t *    buffer,
+                 int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = FETCH_4 (pict, bits, offset);
 	    
 	    pixel |= pixel << 4;
 	    buffer[i] = pixel << 24;
@@ -1699,10 +2088,12 @@ fetch_pixels_a4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 }
 
 static void
-fetch_pixels_r1g2b1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_r1g2b1 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
@@ -1714,122 +2105,134 @@ fetch_pixels_r1g2b1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t r, g, b;
 	    
 	    r = ((pixel & 0x8) * 0xff) << 13;
 	    g = ((pixel & 0x6) * 0x55) << 7;
 	    b = ((pixel & 0x1) * 0xff);
-	    buffer[i] = 0xff000000|r|g|b;
+	    
+	    buffer[i] = 0xff000000 | r | g | b;
 	}
     }
 }
 
 static void
-fetch_pixels_b1g2r1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_b1g2r1 (bits_image_t *pict,
+                     uint32_t *    buffer,
+                     int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t r, g, b;
 	    
 	    b = ((pixel & 0x8) * 0xff) >> 3;
 	    g = ((pixel & 0x6) * 0x55) << 7;
 	    r = ((pixel & 0x1) * 0xff) << 16;
-	    buffer[i] = 0xff000000|r|g|b;
+	    
+	    buffer[i] = 0xff000000 | r | g | b;
 	}
     }
 }
 
 static void
-fetch_pixels_a1r1g1b1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a1r1g1b1 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = ((pixel & 0x8) * 0xff) << 21;
 	    r = ((pixel & 0x4) * 0xff) << 14;
 	    g = ((pixel & 0x2) * 0xff) << 7;
 	    b = ((pixel & 0x1) * 0xff);
-	    buffer[i] = a|r|g|b;
+	    
+	    buffer[i] = a | r | g | b;
 	}
     }
 }
 
 static void
-fetch_pixels_a1b1g1r1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a1b1g1r1 (bits_image_t *pict,
+                       uint32_t *    buffer,
+                       int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t  a,r,g,b;
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t a, r, g, b;
 	    
 	    a = ((pixel & 0x8) * 0xff) << 21;
 	    r = ((pixel & 0x4) * 0xff) >> 3;
 	    g = ((pixel & 0x2) * 0xff) << 7;
 	    b = ((pixel & 0x1) * 0xff) << 16;
-	    buffer[i] = a|r|g|b;
+	    
+	    buffer[i] = a | r | g | b;
 	}
     }
 }
 
 static void
-fetch_pixels_c4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_c4 (bits_image_t *pict,
+                 uint32_t *    buffer,
+                 int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = FETCH_4 (pict, bits, offset);
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = FETCH_4 (pict, bits, offset);
 	    const pixman_indexed_t * indexed = pict->indexed;
 	    
 	    buffer[i] = indexed->rgba[pixel];
@@ -1837,26 +2240,28 @@ fetch_pixels_c4 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
     }
 }
 
-
 static void
-fetch_pixels_a1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_a1 (bits_image_t *pict,
+                 uint32_t *    buffer,
+                 int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t  pixel = READ(pict, bits + (offset >> 5));
-	    uint32_t  a;
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, bits + (offset >> 5));
+	    uint32_t a;
+	    
 #ifdef WORDS_BIGENDIAN
 	    a = pixel >> (0x1f - (offset & 0x1f));
 #else
@@ -1866,71 +2271,80 @@ fetch_pixels_a1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	    a |= a << 1;
 	    a |= a << 2;
 	    a |= a << 4;
+	    
 	    buffer[i] = a << 24;
 	}
     }
 }
 
 static void
-fetch_pixels_g1 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_g1 (bits_image_t *pict,
+                 uint32_t *    buffer,
+                 int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    uint32_t *bits = pict->bits + line*pict->rowstride;
-	    uint32_t pixel = READ(pict, bits + (offset >> 5));
+	    uint32_t *bits = pict->bits + line * pict->rowstride;
+	    uint32_t pixel = READ (pict, bits + (offset >> 5));
 	    const pixman_indexed_t * indexed = pict->indexed;
 	    uint32_t a;
+	    
 #ifdef WORDS_BIGENDIAN
 	    a = pixel >> (0x1f - (offset & 0x1f));
 #else
 	    a = pixel >> (offset & 0x1f);
 #endif
 	    a = a & 1;
+	    
 	    buffer[i] = indexed->rgba[a];
 	}
     }
 }
 
 static void
-fetch_pixels_yuy2 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_yuy2 (bits_image_t *pict,
+                   uint32_t *    buffer,
+                   int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
+	    const uint32_t *bits = pict->bits + pict->rowstride * line;
+	    
 	    int16_t y, u, v;
 	    int32_t r, g, b;
 	    
-	    const uint32_t *bits = pict->bits + pict->rowstride * line;
-	    
 	    y = ((uint8_t *) bits)[offset << 1] - 16;
-	    u = ((uint8_t *) bits)[((offset << 1) & -4) + 1] - 128;
-	    v = ((uint8_t *) bits)[((offset << 1) & -4) + 3] - 128;
+	    u = ((uint8_t *) bits)[((offset << 1) & - 4) + 1] - 128;
+	    v = ((uint8_t *) bits)[((offset << 1) & - 4) + 3] - 128;
 	    
 	    /* R = 1.164(Y - 16) + 1.596(V - 128) */
 	    r = 0x012b27 * y + 0x019a2e * v;
+	    
 	    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 	    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	    
 	    /* B = 1.164(Y - 16) + 2.018(U - 128) */
 	    b = 0x012b27 * y + 0x0206a2 * u;
 	    
@@ -1943,22 +2357,24 @@ fetch_pixels_yuy2 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 }
 
 static void
-fetch_pixels_yv12 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_yv12 (bits_image_t *pict,
+                   uint32_t *    buffer,
+                   int           n_pixels)
 {
     int i;
-
+    
     for (i = 0; i < n_pixels; ++i)
     {
 	int offset = buffer[2 * i];
 	int line = buffer[2 * i + 1];
-
+	
 	if (offset == 0xffffffff || line == 0xffffffff)
 	{
 	    buffer[i] = 0;
 	}
 	else
 	{
-	    YV12_SETUP(pict);
+	    YV12_SETUP (pict);
 	    int16_t y = YV12_Y (line)[offset] - 16;
 	    int16_t u = YV12_U (line)[offset >> 1] - 128;
 	    int16_t v = YV12_V (line)[offset >> 1] - 128;
@@ -1966,8 +2382,10 @@ fetch_pixels_yv12 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 	    
 	    /* R = 1.164(Y - 16) + 1.596(V - 128) */
 	    r = 0x012b27 * y + 0x019a2e * v;
+	    
 	    /* G = 1.164(Y - 16) - 0.813(V - 128) - 0.391(U - 128) */
 	    g = 0x012b27 * y - 0x00d0f2 * v - 0x00647e * u;
+	    
 	    /* B = 1.164(Y - 16) + 2.018(U - 128) */
 	    b = 0x012b27 * y + 0x0206a2 * u;
 	    
@@ -1981,459 +2399,588 @@ fetch_pixels_yv12 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 
 /*********************************** Store ************************************/
 
-#define SPLIT_A(v)	uint32_t	a = ((v) >> 24), r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
-#define SPLIT(v)	uint32_t	r = ((v) >> 16) & 0xff, g = ((v) >> 8) & 0xff, b = (v) & 0xff
+#define SPLIT_A(v)              \
+    uint32_t a = ((v) >> 24),   \
+	r = ((v) >> 16) & 0xff, \
+	g = ((v) >> 8) & 0xff,  \
+	b = (v) & 0xff
+
+#define SPLIT(v)                     \
+    uint32_t r = ((v) >> 16) & 0xff, \
+	g = ((v) >> 8) & 0xff,       \
+	b = (v) & 0xff
 
 static void
-store_scanline_a2r10g10b10 (bits_image_t *image, int x, int y, int width, const uint32_t *v)
+store_scanline_a2r10g10b10 (bits_image_t *  image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            const uint32_t *v)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = bits + x;
     uint64_t *values = (uint64_t *)v;
     int i;
-
-    for (i = 0; i < width; ++i) {
-        WRITE(image, pixel++,
-            ((values[i] >> 32) & 0xc0000000) | // A
-	    ((values[i] >> 18) & 0x3ff00000) | // R
-	    ((values[i] >> 12) & 0xffc00) |    // G
-	    ((values[i] >> 6) & 0x3ff));       // B
+    
+    for (i = 0; i < width; ++i)
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >> 32) & 0xc0000000) | // A
+	       ((values[i] >> 18) & 0x3ff00000) | // R
+	       ((values[i] >> 12) & 0xffc00) | // G
+	       ((values[i] >> 6) & 0x3ff));    // B
     }
 }
 
 static void
-store_scanline_x2r10g10b10 (bits_image_t *image, int x, int y, int width, const uint32_t *v)
+store_scanline_x2r10g10b10 (bits_image_t *  image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            const uint32_t *v)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint64_t *values = (uint64_t *)v;
     uint32_t *pixel = bits + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-        WRITE(image, pixel++,
-	    ((values[i] >> 18) & 0x3ff00000) | // R
-	    ((values[i] >> 12) & 0xffc00) |    // G
-	    ((values[i] >> 6) & 0x3ff));       // B
+    
+    for (i = 0; i < width; ++i)
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >> 18) & 0x3ff00000) | // R
+	       ((values[i] >> 12) & 0xffc00) | // G
+	       ((values[i] >> 6) & 0x3ff));    // B
     }
 }
 
 static void
-store_scanline_a2b10g10r10 (bits_image_t *image, int x, int y, int width, const uint32_t *v)
+store_scanline_a2b10g10r10 (bits_image_t *  image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            const uint32_t *v)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = bits + x;
     uint64_t *values = (uint64_t *)v;
     int i;
     
-    for (i = 0; i < width; ++i) {
-        WRITE(image, pixel++,
-            ((values[i] >> 32) & 0xc0000000) | // A
-            ((values[i] >> 38) & 0x3ff) |      // R
-            ((values[i] >> 12) & 0xffc00) |    // G
-            ((values[i] << 14) & 0x3ff00000)); // B
+    for (i = 0; i < width; ++i)
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >> 32) & 0xc0000000) | // A
+	       ((values[i] >> 38) & 0x3ff) |   // R
+	       ((values[i] >> 12) & 0xffc00) | // G
+	       ((values[i] << 14) & 0x3ff00000)); // B
     }
 }
 
 static void
-store_scanline_x2b10g10r10 (bits_image_t *image, int x, int y, int width, const uint32_t *v)
+store_scanline_x2b10g10r10 (bits_image_t *  image,
+                            int             x,
+                            int             y,
+                            int             width,
+                            const uint32_t *v)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint64_t *values = (uint64_t *)v;
     uint32_t *pixel = bits + x;
     int i;
     
-    for (i = 0; i < width; ++i) {
-        WRITE(image, pixel++,
-            ((values[i] >> 38) & 0x3ff) |      // R
-            ((values[i] >> 12) & 0xffc00) |    // G
-            ((values[i] << 14) & 0x3ff00000)); // B
+    for (i = 0; i < width; ++i)
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >> 38) & 0x3ff) |   // R
+	       ((values[i] >> 12) & 0xffc00) | // G
+	       ((values[i] << 14) & 0x3ff00000)); // B
     }
 }
 
 static void
-store_scanline_a8r8g8b8 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a8r8g8b8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     
-    MEMCPY_WRAPPED(image, ((uint32_t *)bits) + x, values, width*sizeof(uint32_t));
+    MEMCPY_WRAPPED (image, ((uint32_t *)bits) + x, values,
+                    width * sizeof(uint32_t));
 }
 
 static void
-store_scanline_x8r8g8b8 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_x8r8g8b8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = (uint32_t *)bits + x;
     int i;
     
     for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, values[i] & 0xffffff);
+	WRITE (image, pixel++, values[i] & 0xffffff);
 }
 
 static void
-store_scanline_a8b8g8r8 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a8b8g8r8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = (uint32_t *)bits + x;
     int i;
     
     for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, (values[i] & 0xff00ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+    {
+	WRITE (image, pixel++,
+	       (values[i] & 0xff00ff00)         |
+	       ((values[i] >> 16) & 0xff)       |
+	       ((values[i] & 0xff) << 16));
+    }
 }
 
 static void
-store_scanline_x8b8g8r8 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_x8b8g8r8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = (uint32_t *)bits + x;
     int i;
     
     for (i = 0; i < width; ++i)
-	WRITE(image, pixel++, (values[i] & 0x0000ff00) | ((values[i] >> 16) & 0xff) | ((values[i] & 0xff) << 16));
+    {
+	WRITE (image, pixel++,
+	       (values[i] & 0x0000ff00)         |
+	       ((values[i] >> 16) & 0xff)       |
+	       ((values[i] & 0xff) << 16));
+    }
 }
 
 static void
-store_scanline_b8g8r8a8 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_b8g8r8a8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = (uint32_t *)bits + x;
     int i;
-
+    
     for (i = 0; i < width; ++i)
-	WRITE(image, pixel++,
-	    ((values[i] >> 24) & 0x000000ff) |
-	    ((values[i] >>  8) & 0x0000ff00) |
-	    ((values[i] <<  8) & 0x00ff0000) |
-	    ((values[i] << 24) & 0xff000000));
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >> 24) & 0x000000ff) |
+	       ((values[i] >>  8) & 0x0000ff00) |
+	       ((values[i] <<  8) & 0x00ff0000) |
+	       ((values[i] << 24) & 0xff000000));
+    }
 }
 
 static void
-store_scanline_b8g8r8x8 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_b8g8r8x8 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint32_t *pixel = (uint32_t *)bits + x;
     int i;
-
+    
     for (i = 0; i < width; ++i)
-	WRITE(image, pixel++,
-	    ((values[i] >>  8) & 0x0000ff00) |
-	    ((values[i] <<  8) & 0x00ff0000) |
-	    ((values[i] << 24) & 0xff000000));
+    {
+	WRITE (image, pixel++,
+	       ((values[i] >>  8) & 0x0000ff00) |
+	       ((values[i] <<  8) & 0x00ff0000) |
+	       ((values[i] << 24) & 0xff000000));
+    }
 }
 
 static void
-store_scanline_r8g8b8 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_r8g8b8 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
-    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
+    uint8_t *pixel = ((uint8_t *) bits) + 3 * x;
     int i;
-
+    
     for (i = 0; i < width; ++i)
     {
 	uint32_t val = values[i];
+	
 #ifdef WORDS_BIGENDIAN
-	WRITE(image, pixel++, (val & 0x00ff0000) >> 16);
-	WRITE(image, pixel++, (val & 0x0000ff00) >>  8);
-	WRITE(image, pixel++, (val & 0x000000ff) >>  0);
+	WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
+	WRITE (image, pixel++, (val & 0x0000ff00) >>  8);
+	WRITE (image, pixel++, (val & 0x000000ff) >>  0);
 #else
-	WRITE(image, pixel++, (val & 0x000000ff) >>  0);
-	WRITE(image, pixel++, (val & 0x0000ff00) >>  8);
-	WRITE(image, pixel++, (val & 0x00ff0000) >> 16);
+	WRITE (image, pixel++, (val & 0x000000ff) >>  0);
+	WRITE (image, pixel++, (val & 0x0000ff00) >>  8);
+	WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
 #endif
     }
 }
 
 static void
-store_scanline_b8g8r8 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_b8g8r8 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
-    uint8_t *pixel = ((uint8_t *) bits) + 3*x;
+    uint8_t *pixel = ((uint8_t *) bits) + 3 * x;
     int i;
-
+    
     for (i = 0; i < width; ++i)
     {
 	uint32_t val = values[i];
+	
 #ifdef WORDS_BIGENDIAN
-	WRITE(image, pixel++, (val & 0x000000ff) >>  0);
-	WRITE(image, pixel++, (val & 0x0000ff00) >>  8);
-	WRITE(image, pixel++, (val & 0x00ff0000) >> 16);
+	WRITE (image, pixel++, (val & 0x000000ff) >>  0);
+	WRITE (image, pixel++, (val & 0x0000ff00) >>  8);
+	WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
 #else
-	WRITE(image, pixel++, (val & 0x00ff0000) >> 16);
-	WRITE(image, pixel++, (val & 0x0000ff00) >>  8);
-	WRITE(image, pixel++, (val & 0x000000ff) >>  0);
+	WRITE (image, pixel++, (val & 0x00ff0000) >> 16);
+	WRITE (image, pixel++, (val & 0x0000ff00) >>  8);
+	WRITE (image, pixel++, (val & 0x000000ff) >>  0);
 #endif
     }
 }
 
 static void
-store_scanline_r5g6b5 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_r5g6b5 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t *pixel = ((uint16_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
+    
+    for (i = 0; i < width; ++i)
+    {
 	uint32_t s = values[i];
-	WRITE(image, pixel++, ((s >> 3) & 0x001f) |
-	      ((s >> 5) & 0x07e0) |
-	      ((s >> 8) & 0xf800));
+	
+	WRITE (image, pixel++,
+	       ((s >> 3) & 0x001f) |
+	       ((s >> 5) & 0x07e0) |
+	       ((s >> 8) & 0xf800));
     }
 }
 
 static void
-store_scanline_b5g6r5 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_b5g6r5 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++, ((b << 8) & 0xf800) |
-	      ((g << 3) & 0x07e0) |
-	      ((r >> 3)         ));
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
+	
+	WRITE (image, pixel++,
+	       ((b << 8) & 0xf800) |
+	       ((g << 3) & 0x07e0) |
+	       ((r >> 3)         ));
     }
 }
 
 static void
-store_scanline_a1r5g5b5 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a1r5g5b5 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	SPLIT_A(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0x8000) |
-	      ((r << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((b >> 3)         ));
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT_A (values[i]);
+	
+	WRITE (image, pixel++,
+	       ((a << 8) & 0x8000) |
+	       ((r << 7) & 0x7c00) |
+	       ((g << 2) & 0x03e0) |
+	       ((b >> 3)         ));
     }
 }
 
 static void
-store_scanline_x1r5g5b5 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_x1r5g5b5 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++, ((r << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((b >> 3)         ));
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
+	
+	WRITE (image, pixel++,
+	       ((r << 7) & 0x7c00) |
+	       ((g << 2) & 0x03e0) |
+	       ((b >> 3)         ));
     }
 }
 
 static void
-store_scanline_a1b5g5r5 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a1b5g5r5 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT_A (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT_A(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0x8000) |
-	      ((b << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((r >> 3)         ));
+	WRITE (image, pixel++,
+	       ((a << 8) & 0x8000) |
+	       ((b << 7) & 0x7c00) |
+	       ((g << 2) & 0x03e0) |
+	       ((r >> 3)         ));
     }
 }
 
 static void
-store_scanline_x1b5g5r5 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_x1b5g5r5 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++, ((b << 7) & 0x7c00) |
-	      ((g << 2) & 0x03e0) |
-	      ((r >> 3)         ));
+	WRITE (image, pixel++, ((b << 7) & 0x7c00) |
+	       ((g << 2) & 0x03e0) |
+	       ((r >> 3)         ));
     }
 }
 
 static void
-store_scanline_a4r4g4b4 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a4r4g4b4 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT_A (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT_A(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0xf000) |
-	      ((r << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((b >> 4)         ));
+	WRITE (image, pixel++,
+	       ((a << 8) & 0xf000) |
+	       ((r << 4) & 0x0f00) |
+	       ((g     ) & 0x00f0) |
+	       ((b >> 4)         ));
     }
 }
 
 static void
-store_scanline_x4r4g4b4 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_x4r4g4b4 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++, ((r << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((b >> 4)         ));
+	WRITE (image, pixel++,
+	       ((r << 4) & 0x0f00) |
+	       ((g     ) & 0x00f0) |
+	       ((b >> 4)         ));
     }
 }
 
 static void
-store_scanline_a4b4g4r4 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a4b4g4r4 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	SPLIT_A(values[i]);
-	WRITE(image, pixel++, ((a << 8) & 0xf000) |
-	      ((b << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((r >> 4)         ));
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT_A (values[i]);
+	WRITE (image, pixel++, ((a << 8) & 0xf000) |
+	       ((b << 4) & 0x0f00) |
+	       ((g     ) & 0x00f0) |
+	       ((r >> 4)         ));
     }
 }
 
 static void
-store_scanline_x4b4g4r4 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_x4b4g4r4 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint16_t  *pixel = ((uint16_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++, ((b << 4) & 0x0f00) |
-	      ((g     ) & 0x00f0) |
-	      ((r >> 4)         ));
+	WRITE (image, pixel++,
+	       ((b << 4) & 0x0f00) |
+	       ((g     ) & 0x00f0) |
+	       ((r >> 4)         ));
     }
 }
 
 static void
-store_scanline_a8 (bits_image_t *image,
-	    int x, int y, int width,
-	    const uint32_t *values)
+store_scanline_a8 (bits_image_t *  image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, values[i] >> 24);
+    
+    for (i = 0; i < width; ++i)
+    {
+	WRITE (image, pixel++, values[i] >> 24);
     }
 }
 
 static void
-store_scanline_r3g3b2 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_r3g3b2 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++,
-	      ((r     ) & 0xe0) |
-	      ((g >> 3) & 0x1c) |
-	      ((b >> 6)       ));
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
+	
+	WRITE (image, pixel++,
+	       ((r     ) & 0xe0) |
+	       ((g >> 3) & 0x1c) |
+	       ((b >> 6)       ));
     }
 }
 
 static void
-store_scanline_b2g3r3 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_b2g3r3 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT(values[i]);
-	WRITE(image, pixel++,
-	      ((b     ) & 0xc0) |
-	      ((g >> 2) & 0x38) |
-	      ((r >> 5)       ));
+	WRITE (image, pixel++,
+	       ((b     ) & 0xc0) |
+	       ((g >> 2) & 0x38) |
+	       ((r >> 5)       ));
     }
 }
 
 static void
-store_scanline_a2r2g2b2 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a2r2g2b2 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT_A (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT_A(values[i]);
-	WRITE(image, pixel++, ((a     ) & 0xc0) |
-	      ((r >> 2) & 0x30) |
-	      ((g >> 4) & 0x0c) |
-	      ((b >> 6)       ));
+	WRITE (image, pixel++,
+	       ((a     ) & 0xc0) |
+	       ((r >> 2) & 0x30) |
+	       ((g >> 4) & 0x0c) |
+	       ((b >> 6)       ));
     }
 }
 
 static void
-store_scanline_a2b2g2r2 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a2b2g2r2 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     int i;
+    
+    for (i = 0; i < width; ++i)
+    {
+	SPLIT_A (values[i]);
 
-    for (i = 0; i < width; ++i) {
-	SPLIT_A(values[i]);
-	*(pixel++) =  ((a     ) & 0xc0) |
+	*(pixel++) =
+	    ((a     ) & 0xc0) |
 	    ((b >> 2) & 0x30) |
 	    ((g >> 4) & 0x0c) |
 	    ((r >> 6)       );
@@ -2441,177 +2988,205 @@ store_scanline_a2b2g2r2 (bits_image_t *image,
 }
 
 static void
-store_scanline_c8 (bits_image_t *image,
-	    int x, int y, int width,
-	    const uint32_t *values)
+store_scanline_c8 (bits_image_t *  image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t *pixel = ((uint8_t *) bits) + x;
     const pixman_indexed_t *indexed = image->indexed;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, RGB24_TO_ENTRY(indexed,values[i]));
-    }
+    for (i = 0; i < width; ++i)
+	WRITE (image, pixel++, RGB24_TO_ENTRY (indexed,values[i]));
 }
 
 static void
-store_scanline_x4a4 (bits_image_t *image,
-	      int x, int y, int width,
-	      const uint32_t *values)
+store_scanline_x4a4 (bits_image_t *  image,
+                     int             x,
+                     int             y,
+                     int             width,
+                     const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     uint8_t   *pixel = ((uint8_t *) bits) + x;
     int i;
-
-    for (i = 0; i < width; ++i) {
-	WRITE(image, pixel++, values[i] >> 28);
-    }
+    
+    for (i = 0; i < width; ++i)
+	WRITE (image, pixel++, values[i] >> 28);
 }
 
-#define STORE_8(img,l,o,v)  (WRITE(img, (uint8_t *)(l) + ((o) >> 3), (v)))
+#define STORE_8(img,l,o,v)  (WRITE (img, (uint8_t *)(l) + ((o) >> 3), (v)))
 #ifdef WORDS_BIGENDIAN
-#define STORE_4(img,l,o,v)  STORE_8(img,l,o,((o) & 4 ?				\
-				   (FETCH_8(img,l,o) & 0xf0) | (v) :		\
-				   (FETCH_8(img,l,o) & 0x0f) | ((v) << 4)))
+#define STORE_4(img,l,o,v)					   \
+    STORE_8 (img,l,o,((o) & 4 ?					    \
+                      (FETCH_8 (img,l,o) & 0xf0) | (v) :            \
+                      (FETCH_8 (img,l,o) & 0x0f) | ((v) << 4)))
 #else
-#define STORE_4(img,l,o,v)  STORE_8(img,l,o,((o) & 4 ?			       \
-				   (FETCH_8(img,l,o) & 0x0f) | ((v) << 4) : \
-				   (FETCH_8(img,l,o) & 0xf0) | (v)))
+#define STORE_4(img,l,o,v)				       \
+    STORE_8 (img,l,o,((o) & 4 ?					\
+                      (FETCH_8 (img,l,o) & 0x0f) | ((v) << 4) : \
+                      (FETCH_8 (img,l,o) & 0xf0) | (v)))
 #endif
 
 static void
-store_scanline_a4 (bits_image_t *image,
-	    int x, int y, int width,
-	    const uint32_t *values)
+store_scanline_a4 (bits_image_t *  image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	STORE_4(image, bits, i + x, values[i]>>28);
-    }
+    for (i = 0; i < width; ++i)
+	STORE_4 (image, bits, i + x, values[i] >> 28);
 }
 
 static void
-store_scanline_r1g2b1 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_r1g2b1 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	SPLIT(values[i]);
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t pixel;
+	
+	SPLIT (values[i]);
 	pixel = (((r >> 4) & 0x8) |
-		 ((g >> 5) & 0x6) |
-		 ((b >> 7)      ));
-	STORE_4(image, bits, i + x, pixel);
+	         ((g >> 5) & 0x6) |
+	         ((b >> 7)      ));
+	STORE_4 (image, bits, i + x, pixel);
     }
 }
 
 static void
-store_scanline_b1g2r1 (bits_image_t *image,
-		int x, int y, int width,
-		const uint32_t *values)
+store_scanline_b1g2r1 (bits_image_t *  image,
+                       int             x,
+                       int             y,
+                       int             width,
+                       const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	SPLIT(values[i]);
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t pixel;
+	
+	SPLIT (values[i]);
 	pixel = (((b >> 4) & 0x8) |
-		 ((g >> 5) & 0x6) |
-		 ((r >> 7)      ));
-	STORE_4(image, bits, i + x, pixel);
+	         ((g >> 5) & 0x6) |
+	         ((r >> 7)      ));
+	STORE_4 (image, bits, i + x, pixel);
     }
 }
 
 static void
-store_scanline_a1r1g1b1 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a1r1g1b1 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-	SPLIT_A(values[i]);
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t pixel;
+	
+	SPLIT_A (values[i]);
 	pixel = (((a >> 4) & 0x8) |
-		 ((r >> 5) & 0x4) |
-		 ((g >> 6) & 0x2) |
-		 ((b >> 7)      ));
-	STORE_4(image, bits, i + x, pixel);
+	         ((r >> 5) & 0x4) |
+	         ((g >> 6) & 0x2) |
+	         ((b >> 7)      ));
+	STORE_4 (image, bits, i + x, pixel);
     }
 }
 
 static void
-store_scanline_a1b1g1r1 (bits_image_t *image,
-		  int x, int y, int width,
-		  const uint32_t *values)
+store_scanline_a1b1g1r1 (bits_image_t *  image,
+                         int             x,
+                         int             y,
+                         int             width,
+                         const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-	SPLIT_A(values[i]);
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t pixel;
+	
+	SPLIT_A (values[i]);
 	pixel = (((a >> 4) & 0x8) |
-		 ((b >> 5) & 0x4) |
-		 ((g >> 6) & 0x2) |
-		 ((r >> 7)      ));
-	STORE_4(image, bits, i + x, pixel);
+	         ((b >> 5) & 0x4) |
+	         ((g >> 6) & 0x2) |
+	         ((r >> 7)      ));
+	STORE_4 (image, bits, i + x, pixel);
     }
 }
 
 static void
-store_scanline_c4 (bits_image_t *image,
-	    int x, int y, int width,
-	    const uint32_t *values)
+store_scanline_c4 (bits_image_t *  image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     const pixman_indexed_t *indexed = image->indexed;
     int i;
     
-    for (i = 0; i < width; ++i) {
-	uint32_t  pixel;
-
-	pixel = RGB24_TO_ENTRY(indexed, values[i]);
-	STORE_4(image, bits, i + x, pixel);
+    for (i = 0; i < width; ++i)
+    {
+	uint32_t pixel;
+	
+	pixel = RGB24_TO_ENTRY (indexed, values[i]);
+	STORE_4 (image, bits, i + x, pixel);
     }
 }
 
 static void
-store_scanline_a1 (bits_image_t *image,
-	    int x, int y, int width,
-	    const uint32_t *values)
+store_scanline_a1 (bits_image_t *  image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     int i;
     
     for (i = 0; i < width; ++i)
     {
-	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
+	uint32_t  *pixel = ((uint32_t *) bits) + ((i + x) >> 5);
 	uint32_t mask, v;
+
 #ifdef WORDS_BIGENDIAN
-	mask = 1 << (0x1f - ((i+x) & 0x1f));
+	mask = 1 << (0x1f - ((i + x) & 0x1f));
 #else
-	mask = 1 << ((i+x) & 0x1f);
+	mask = 1 << ((i + x) & 0x1f);
 #endif
 	v = values[i] & 0x80000000 ? mask : 0;
-	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
+	
+	WRITE (image, pixel, (READ (image, pixel) & ~mask) | v);
     }
 }
 
 static void
-store_scanline_g1 (bits_image_t *image,
-	    int x, int y, int width,
-	    const uint32_t *values)
+store_scanline_g1 (bits_image_t *  image,
+                   int             x,
+                   int             y,
+                   int             width,
+                   const uint32_t *values)
 {
     uint32_t *bits = image->bits + image->rowstride * y;
     const pixman_indexed_t *indexed = image->indexed;
@@ -2619,15 +3194,17 @@ store_scanline_g1 (bits_image_t *image,
     
     for (i = 0; i < width; ++i)
     {
-	uint32_t  *pixel = ((uint32_t *) bits) + ((i+x) >> 5);
-	uint32_t  mask, v;
+	uint32_t  *pixel = ((uint32_t *) bits) + ((i + x) >> 5);
+	uint32_t mask, v;
+
 #ifdef WORDS_BIGENDIAN
-	mask = 1 << (0x1f - ((i+x) & 0x1f));
+	mask = 1 << (0x1f - ((i + x) & 0x1f));
 #else
 	mask = 1 << ((i + x) & 0x1f);
 #endif
 	v = RGB24_TO_ENTRY_Y (indexed, values[i]) ? mask : 0;
-	WRITE(image, pixel, (READ(image, pixel) & ~mask) | v);
+
+	WRITE (image, pixel, (READ (image, pixel) & ~mask) | v);
     }
 }
 
@@ -2636,30 +3213,41 @@ store_scanline_g1 (bits_image_t *image,
  * store proc. Despite the type, this function expects a uint64_t buffer.
  */
 static void
-store_scanline_generic_64 (bits_image_t *image, int x, int y, int width, const uint32_t *values)
+store_scanline_generic_64 (bits_image_t *  image,
+                           int             x,
+                           int             y,
+                           int             width,
+                           const uint32_t *values)
 {
     uint32_t *argb8_pixels;
-
-    assert(image->common.type == BITS);
-
+    
+    assert (image->common.type == BITS);
+    
     argb8_pixels = pixman_malloc_ab (width, sizeof(uint32_t));
     if (!argb8_pixels)
 	return;
-
+    
     /* Contract the scanline.  We could do this in place if values weren't
      * const.
      */
-    pixman_contract(argb8_pixels, (uint64_t *)values, width);
+    pixman_contract (argb8_pixels, (uint64_t *)values, width);
     
     image->store_scanline_raw_32 (image, x, y, width, argb8_pixels);
-
-    free(argb8_pixels);
+    
+    free (argb8_pixels);
 }
 
-/* Despite the type, this function expects both buffer and mask to be uint64_t */
+/* Despite the type, this function expects both buffer
+ * and mask to be uint64_t
+ */
 static void
-fetch_scanline_generic_64 (pixman_image_t *image, int x, int y, int width, uint32_t *buffer,
-		   const uint32_t *mask, uint32_t mask_bits)
+fetch_scanline_generic_64 (pixman_image_t *image,
+                           int             x,
+                           int             y,
+                           int             width,
+                           uint32_t *      buffer,
+                           const uint32_t *mask,
+                           uint32_t        mask_bits)
 {
     /* Fetch the pixels into the first half of buffer and then expand them in
      * place.
@@ -2671,7 +3259,9 @@ fetch_scanline_generic_64 (pixman_image_t *image, int x, int y, int width, uint3
 
 /* Despite the type, this function expects a uint64_t *buffer */
 static void
-fetch_pixels_generic_64 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_generic_64 (bits_image_t *pict,
+                         uint32_t *    buffer,
+                         int           n_pixels)
 {
     pict->fetch_pixels_raw_32 (pict, buffer, n_pixels);
     
@@ -2679,16 +3269,18 @@ fetch_pixels_generic_64 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
 }
 
 /*
- * XXX: The transformed fetch path only works at 32-bpp so far.  When all paths
- * have wide versions, this can be removed.
+ * XXX: The transformed fetch path only works at 32-bpp so far.  When all
+ * paths have wide versions, this can be removed.
  *
  * WARNING: This function loses precision!
  */
 static void
-fetch_pixels_generic_lossy_32 (bits_image_t *pict, uint32_t *buffer, int n_pixels)
+fetch_pixels_generic_lossy_32 (bits_image_t *pict,
+                               uint32_t *    buffer,
+                               int           n_pixels)
 {
-    /* Since buffer contains n_pixels coordinate pairs, it also has enough room for
-     * n_pixels 64 bit pixels.
+    /* Since buffer contains n_pixels coordinate pairs, it also has enough
+     * room for n_pixels 64 bit pixels.
      */
     pict->fetch_pixels_raw_64 (pict, buffer, n_pixels);
     
@@ -2697,22 +3289,23 @@ fetch_pixels_generic_lossy_32 (bits_image_t *pict, uint32_t *buffer, int n_pixel
 
 typedef struct
 {
-    pixman_format_code_t		format;
-    fetch_scanline_t				fetch_scanline_raw_32;
-    fetch_scanline_t				fetch_scanline_raw_64;
-    fetch_pixels_t			fetch_pixels_raw_32;
-    fetch_pixels_t			fetch_pixels_raw_64;
-    store_scanline_t			store_scanline_raw_32;
-    store_scanline_t			store_scanline_raw_64;
+    pixman_format_code_t	format;
+    fetch_scanline_t		fetch_scanline_raw_32;
+    fetch_scanline_t		fetch_scanline_raw_64;
+    fetch_pixels_t		fetch_pixels_raw_32;
+    fetch_pixels_t		fetch_pixels_raw_64;
+    store_scanline_t		store_scanline_raw_32;
+    store_scanline_t		store_scanline_raw_64;
 } format_info_t;
 
-#define FORMAT_INFO(format)						\
+#define FORMAT_INFO(format) 						\
     {									\
-	PIXMAN_##format,						\
-	    fetch_scanline_##format, fetch_scanline_generic_64,			\
-	    fetch_pixels_##format, fetch_pixels_generic_64,		\
-	    store_scanline_##format, store_scanline_generic_64				\
-    }
+	PIXMAN_ ## format,						\
+	    fetch_scanline_ ## format,					\
+	    fetch_scanline_generic_64,					\
+	    fetch_pixels_ ## format, fetch_pixels_generic_64,		\
+	    store_scanline_ ## format, store_scanline_generic_64	\
+	    }
 
 static const format_info_t accessors[] =
 {
@@ -2723,7 +3316,7 @@ static const format_info_t accessors[] =
     FORMAT_INFO (x8b8g8r8),
     FORMAT_INFO (b8g8r8a8),
     FORMAT_INFO (b8g8r8x8),
-
+    
 /* 24bpp formats */
     FORMAT_INFO (r8g8b8),
     FORMAT_INFO (b8g8r8),
@@ -2749,15 +3342,17 @@ static const format_info_t accessors[] =
     FORMAT_INFO (a2b2g2r2),
     
     FORMAT_INFO (c8),
-
+    
 #define fetch_scanline_g8 fetch_scanline_c8
 #define fetch_pixels_g8 fetch_pixels_c8
 #define store_scanline_g8 store_scanline_c8
     FORMAT_INFO (g8),
+
 #define fetch_scanline_x4c4 fetch_scanline_c8
 #define fetch_pixels_x4c4 fetch_pixels_c8
 #define store_scanline_x4c4 store_scanline_c8
     FORMAT_INFO (x4c4),
+
 #define fetch_scanline_x4g4 fetch_scanline_c8
 #define fetch_pixels_x4g4 fetch_pixels_c8
 #define store_scanline_x4g4 store_scanline_c8
@@ -2773,6 +3368,7 @@ static const format_info_t accessors[] =
     FORMAT_INFO (a1b1g1r1),
     
     FORMAT_INFO (c4),
+
 #define fetch_scanline_g4 fetch_scanline_c4
 #define fetch_pixels_g4 fetch_pixels_c4
 #define store_scanline_g4 store_scanline_c4
@@ -2781,29 +3377,29 @@ static const format_info_t accessors[] =
 /* 1bpp formats */
     FORMAT_INFO (a1),
     FORMAT_INFO (g1),
-
+    
 /* Wide formats */
-
+    
     { PIXMAN_a2r10g10b10,
       NULL, fetch_scanline_a2r10g10b10,
       fetch_pixels_generic_lossy_32, fetch_pixels_a2r10g10b10_64,
       NULL, store_scanline_a2r10g10b10 },
-
+    
     { PIXMAN_x2r10g10b10,
       NULL, fetch_scanline_x2r10g10b10,
       fetch_pixels_generic_lossy_32, fetch_pixels_x2r10g10b10_64,
       NULL, store_scanline_x2r10g10b10 },
-
+    
     { PIXMAN_a2b10g10r10,
       NULL, fetch_scanline_a2b10g10r10,
       fetch_pixels_generic_lossy_32, fetch_pixels_a2b10g10r10_64,
       NULL, store_scanline_a2b10g10r10 },
-
+    
     { PIXMAN_x2b10g10r10,
       NULL, fetch_scanline_x2b10g10r10,
       fetch_pixels_generic_lossy_32, fetch_pixels_x2b10g10r10_64,
       NULL, store_scanline_x2b10g10r10 },
-
+    
 /* YUV formats */
     { PIXMAN_yuy2,
       fetch_scanline_yuy2, fetch_scanline_generic_64,
@@ -2822,7 +3418,7 @@ static void
 setup_accessors (bits_image_t *image)
 {
     const format_info_t *info = accessors;
-
+    
     while (info->format != PIXMAN_null)
     {
 	if (info->format == image->format)
@@ -2833,10 +3429,10 @@ setup_accessors (bits_image_t *image)
 	    image->fetch_pixels_raw_64 = info->fetch_pixels_raw_64;
 	    image->store_scanline_raw_32 = info->store_scanline_raw_32;
 	    image->store_scanline_raw_64 = info->store_scanline_raw_64;
-
+	    
 	    return;
 	}
-
+	
 	info++;
     }
 }


More information about the xorg-commit mailing list