pixman: Branch 'master' - 6 commits

Tue Sep 27 10:13:51 PDT 2011

configure.ac            |   56 +++++++++++-
 pixman/Makefile.am      |   14 ++-
 pixman/Makefile.win32   |    2 
 pixman/pixman-cpu.c     |   30 +++++-
 pixman/pixman-mmx.c     |  213 ++++++++++++++++++++++++++++++++++--------------
 pixman/pixman-private.h |    2 
 6 files changed, 242 insertions(+), 75 deletions(-)

New commits:
commit d1313febbec2124ee175cd323a338caa3c1a8fc2
Author: Matt Turner <mattst88 at gmail.com>
Date:   Thu Sep 22 15:28:00 2011 -0400

    mmx: optimize unaligned 64-bit ARM/iwmmxt loads
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 801ae32..f848ab4 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -314,6 +314,13 @@ static __inline__ uint64_t ldq_u(uint64_t *p)
 #ifdef USE_X86_MMX
     /* x86's alignment restrictions are very relaxed. */
     return *p;
+#elif defined USE_ARM_IWMMXT
+    int align = (uintptr_t)p & 7;
+    __m64 *aligned_p;
+    if (align == 0)
+	return *p;
+    aligned_p = (__m64 *)((uintptr_t)p & ~7);
+    return _mm_align_si64 (aligned_p[0], aligned_p[1], align);
 #else
     struct __una_u64 { uint64_t x __attribute__((packed)); };
     const struct __una_u64 *ptr = (const struct __una_u64 *) p;
commit 7ab94c5f99cc524ddfbbcedca4304ec7943f74e1
Author: Matt Turner <mattst88 at gmail.com>
Date:   Sun Jul 31 22:42:24 2011 -0400

    mmx: compile on ARM for iwmmxt optimizations
    
    Check in configure for at least gcc-4.6, since gcc-4.7 (and hopefully
    4.6) will be the eariest version capable of compiling the _mm_*
    intrinsics on ARM/iwmmxt. Even for suitable compile versions I use
    _mm_srli_si64 which is known to cause unpatched compilers to fail.
    
    Select iwmmxt at runtime only after NEON, since we expect the NEON
    optimizations to be more capable and faster than iwmmxt.
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/configure.ac b/configure.ac
index 561cb67..481d0bb 100644
--- a/configure.ac
+++ b/configure.ac
@@ -540,6 +540,54 @@ if test $enable_arm_neon = yes && test $have_arm_neon = no ; then
    AC_MSG_ERROR([ARM NEON intrinsics not detected])
 fi
 
+dnl ===========================================================================
+dnl Check for IWMMXT
+
+if test "x$IWMMXT_CFLAGS" = "x" ; then
+   IWMMXT_CFLAGS="-march=iwmmxt -flax-vector-conversions -Winline"
+fi
+
+have_iwmmxt_intrinsics=no
+AC_MSG_CHECKING(whether to use ARM IWMMXT intrinsics)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="$IWMMXT_CFLAGS $CFLAGS"
+AC_COMPILE_IFELSE([
+#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 3 && __GNUC_MINOR__ < 6))
+#error "Need GCC >= 4.6 for IWMMXT intrinsics"
+#endif
+#include <mmintrin.h>
+int main () {
+	union {
+		__m64 v;
+		[char c[8];]
+	} a = { .c = {1, 2, 3, 4, 5, 6, 7, 8} };
+	int b = 4;
+	__m64 c = _mm_srli_si64 (a.v, b);
+}], have_iwmmxt_intrinsics=yes)
+CFLAGS=$xserver_save_CFLAGS
+
+AC_ARG_ENABLE(arm-iwmmxt,
+   [AC_HELP_STRING([--disable-arm-iwmmxt],
+                   [disable ARM IWMMXT fast paths])],
+   [enable_iwmmxt=$enableval], [enable_iwmmxt=auto])
+
+if test $enable_iwmmxt = no ; then
+   have_iwmmxt_intrinsics=disabled
+fi
+
+if test $have_iwmmxt_intrinsics = yes ; then
+   AC_DEFINE(USE_ARM_IWMMXT, 1, [use ARM IWMMXT compiler intrinsics])
+else
+   IWMMXT_CFLAGS=
+fi
+
+AC_MSG_RESULT($have_iwmmxt_intrinsics)
+if test $enable_iwmmxt = yes && test $have_iwmmxt_intrinsics = no ; then
+   AC_MSG_ERROR([IWMMXT intrinsics not detected])
+fi
+
+AM_CONDITIONAL(USE_ARM_IWMMXT, test $have_iwmmxt_intrinsics = yes)
+
 dnl =========================================================================================
 dnl Check for GNU-style inline assembly support
 
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 0932ce7..286b7cf 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -90,5 +90,17 @@ libpixman_1_la_LIBADD += libpixman-arm-neon.la
 ASM_CFLAGS_arm_neon=
 endif
 
+# iwmmxt code
+if USE_ARM_IWMMXT
+noinst_LTLIBRARIES += libpixman-iwmmxt.la
+libpixman_iwmmxt_la_SOURCES = pixman-mmx.c
+libpixman_iwmmxt_la_CFLAGS = $(DEP_CFLAGS) $(IWMMXT_CFLAGS)
+libpixman_iwmmxt_la_LIBADD = $(DEP_LIBS)
+libpixman_1_la_LDFLAGS += $(IWMMXT_LDFLAGS)
+libpixman_1_la_LIBADD += libpixman-iwmmxt.la
+
+ASM_CFLAGS_IWMMXT=$(IWMMXT_CFLAGS)
+endif
+
 .c.s : $(libpixmaninclude_HEADERS) $(BUILT_SOURCES)
 	$(CC) $(CFLAGS) $(ASM_CFLAGS_$(@:pixman-%.s=%)) $(ASM_CFLAGS_$(@:pixman-arm-%.s=arm_%)) -DHAVE_CONFIG_H -I$(srcdir) -I$(builddir) -I$(top_builddir) -S -o $@ $<
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 78d3033..dff27d1 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -187,7 +187,7 @@ pixman_have_vmx (void)
 #endif /* __APPLE__ */
 #endif /* USE_VMX */
 
-#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON)
+#if defined(USE_ARM_SIMD) || defined(USE_ARM_NEON) || defined(USE_ARM_IWMMXT)
 
 #if defined(_MSC_VER)
 
@@ -328,14 +328,27 @@ pixman_have_arm_neon (void)
 
 #endif /* USE_ARM_NEON */
 
+#if defined(USE_ARM_IWMMXT)
+pixman_bool_t
+pixman_have_arm_iwmmxt (void)
+{
+    if (!arm_tests_initialized)
+	pixman_arm_read_auxv ();
+
+    return arm_has_iwmmxt;
+}
+
+#endif /* USE_ARM_IWMMXT */
+
 #else /* linux ELF */
 
 #define pixman_have_arm_simd() FALSE
 #define pixman_have_arm_neon() FALSE
+#define pixman_have_arm_iwmmxt() FALSE
 
 #endif
 
-#endif /* USE_ARM_SIMD || USE_ARM_NEON */
+#endif /* USE_ARM_SIMD || USE_ARM_NEON || USE_ARM_IWMMXT */
 
 #if defined(USE_X86_MMX) || defined(USE_SSE2)
 /* The CPU detection code needs to be in a file not compiled with
@@ -596,11 +609,16 @@ _pixman_choose_implementation (void)
 	imp = _pixman_implementation_create_arm_simd (imp);
 #endif
 
+#ifdef USE_ARM_IWMMXT
+    if (pixman_have_arm_iwmmxt ())
+	imp = _pixman_implementation_create_mmx (imp);
+#endif
+
 #ifdef USE_ARM_NEON
     if (pixman_have_arm_neon ())
 	imp = _pixman_implementation_create_arm_neon (imp);
 #endif
-    
+
 #ifdef USE_VMX
     if (pixman_have_vmx ())
 	imp = _pixman_implementation_create_vmx (imp);
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 09f88c8..4d645fe 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -545,7 +545,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
 pixman_implementation_t *
 _pixman_implementation_create_noop (pixman_implementation_t *fallback);
 
-#ifdef USE_X86_MMX
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT
 pixman_implementation_t *
 _pixman_implementation_create_mmx (pixman_implementation_t *fallback);
 #endif
commit f66887d9eae9646c838d518020168b1403705b1e
Author: Matt Turner <mattst88 at gmail.com>
Date:   Sun Sep 4 14:11:46 2011 -0400

    mmx: prepare pixman-mmx.c to be compiled for ARM/iwmmxt
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 0317b9a..801ae32 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -33,7 +33,7 @@
 #include <config.h>
 #endif
 
-#ifdef USE_X86_MMX
+#if defined USE_X86_MMX || defined USE_ARM_IWMMXT
 
 #include <mmintrin.h>
 #include "pixman-private.h"
@@ -47,6 +47,15 @@
 #define CHECKPOINT()
 #endif
 
+#ifdef USE_ARM_IWMMXT
+/* Empty the multimedia state. For some reason, ARM's mmintrin.h doesn't provide this.  */
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_empty (void)
+{
+
+}
+#endif
+
 /* Notes about writing mmx code
  *
  * give memory operands as the second operand. If you give it as the
@@ -3218,4 +3227,4 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
     return imp;
 }
 
-#endif /* USE_X86_MMX */
+#endif /* USE_X86_MMX || USE_ARM_IWMMXT */
commit 7c6d5d1999989187b60f1e0e82e55ed55238eb27
Author: Matt Turner <mattst88 at gmail.com>
Date:   Thu Sep 8 20:33:45 2011 +0200

    mmx: fix unaligned accesses
    
    Simply return *p in the unaligned access functions, since alignment
    constraints are very relaxed on x86 and this allows us to generate
    identical code as before.
    
    Tested with the test suite, lowlevel-blit-test, and cairo-perf-trace on
    ARM and Alpha with no unaligned accesses found.
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 8782d89..0317b9a 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -298,6 +298,32 @@ in_over (__m64 src, __m64 srca, __m64 mask, __m64 dest)
 
 #endif
 
+/* Elemental unaligned loads */
+
+static __inline__ uint64_t ldq_u(uint64_t *p)
+{
+#ifdef USE_X86_MMX
+    /* x86's alignment restrictions are very relaxed. */
+    return *p;
+#else
+    struct __una_u64 { uint64_t x __attribute__((packed)); };
+    const struct __una_u64 *ptr = (const struct __una_u64 *) p;
+    return ptr->x;
+#endif
+}
+
+static __inline__ uint32_t ldl_u(uint32_t *p)
+{
+#ifdef USE_X86_MMX
+    /* x86's alignment restrictions are very relaxed. */
+    return *p;
+#else
+    struct __una_u32 { uint32_t x __attribute__((packed)); };
+    const struct __una_u32 *ptr = (const struct __una_u32 *) p;
+    return ptr->x;
+#endif
+}
+
 static force_inline __m64
 load8888 (uint32_t v)
 {
@@ -1366,7 +1392,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 
 	while (w >= 2)
 	{
-	    __m64 vs = *(__m64 *)src;
+	    __m64 vs = (__m64)ldq_u((uint64_t *)src);
 	    __m64 vd = *(__m64 *)dst;
 	    __m64 vsrc0 = expand8888 (vs, 0);
 	    __m64 vsrc1 = expand8888 (vs, 1);
@@ -1447,14 +1473,14 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	    __m64 vd6 = *(__m64 *)(dst + 12);
 	    __m64 vd7 = *(__m64 *)(dst + 14);
 
-	    __m64 vs0 = *(__m64 *)(src + 0);
-	    __m64 vs1 = *(__m64 *)(src + 2);
-	    __m64 vs2 = *(__m64 *)(src + 4);
-	    __m64 vs3 = *(__m64 *)(src + 6);
-	    __m64 vs4 = *(__m64 *)(src + 8);
-	    __m64 vs5 = *(__m64 *)(src + 10);
-	    __m64 vs6 = *(__m64 *)(src + 12);
-	    __m64 vs7 = *(__m64 *)(src + 14);
+	    __m64 vs0 = (__m64)ldq_u((uint64_t *)(src + 0));
+	    __m64 vs1 = (__m64)ldq_u((uint64_t *)(src + 2));
+	    __m64 vs2 = (__m64)ldq_u((uint64_t *)(src + 4));
+	    __m64 vs3 = (__m64)ldq_u((uint64_t *)(src + 6));
+	    __m64 vs4 = (__m64)ldq_u((uint64_t *)(src + 8));
+	    __m64 vs5 = (__m64)ldq_u((uint64_t *)(src + 10));
+	    __m64 vs6 = (__m64)ldq_u((uint64_t *)(src + 12));
+	    __m64 vs7 = (__m64)ldq_u((uint64_t *)(src + 14));
 
 	    vd0 = pack8888 (
 	        in_over (expandx888 (vs0, 0), srca, vmask, expand8888 (vd0, 0)),
@@ -2491,23 +2517,35 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	if ((((unsigned long)dest_image & 3) == 0) &&
-	    (((unsigned long)src_image & 3) == 0))
+	while (w && (unsigned long)dst & 7)
 	{
-	    while (w >= 4)
-	    {
-		__m64 vmask;
-		__m64 vdest;
+	    uint16_t tmp;
+	    uint8_t a;
+	    uint32_t m, d;
 
-		vmask = load8888 (*(uint32_t *)mask);
-		vdest = load8888 (*(uint32_t *)dst);
+	    a = *mask++;
+	    d = *dst;
 
-		*(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+	    m = MUL_UN8 (sa, a, tmp);
+	    d = MUL_UN8 (m, d, tmp);
 
-		dst += 4;
-		mask += 4;
-		w -= 4;
-	    }
+	    *dst++ = d;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    __m64 vmask;
+	    __m64 vdest;
+
+	    vmask = load8888 (ldl_u((uint32_t *)mask));
+	    vdest = load8888 (*(uint32_t *)dst);
+
+	    *(uint32_t *)dst = store8888 (in (in (vsrca, vmask), vdest));
+
+	    dst += 4;
+	    mask += 4;
+	    w -= 4;
 	}
 
 	while (w--)
@@ -2550,20 +2588,31 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	if ((((unsigned long)dest_image & 3) == 0) &&
-	    (((unsigned long)src_image & 3) == 0))
+	while (w && (unsigned long)dst & 3)
 	{
-	    while (w >= 4)
-	    {
-		uint32_t *s = (uint32_t *)src;
-		uint32_t *d = (uint32_t *)dst;
+	    uint8_t s, d;
+	    uint16_t tmp;
 
-		*d = store8888 (in (load8888 (*s), load8888 (*d)));
+	    s = *src;
+	    d = *dst;
 
-		w -= 4;
-		dst += 4;
-		src += 4;
-	    }
+	    *dst = MUL_UN8 (s, d, tmp);
+
+	    src++;
+	    dst++;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    uint32_t *s = (uint32_t *)src;
+	    uint32_t *d = (uint32_t *)dst;
+
+	    *d = store8888 (in (load8888 (ldl_u((uint32_t *)s)), load8888 (*d)));
+
+	    w -= 4;
+	    dst += 4;
+	    src += 4;
 	}
 
 	while (w--)
@@ -2618,20 +2667,36 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	if ((((unsigned long)mask_image & 3) == 0) &&
-	    (((unsigned long)dest_image  & 3) == 0))
+	while (w && (unsigned long)dst & 3)
 	{
-	    while (w >= 4)
-	    {
-		__m64 vmask = load8888 (*(uint32_t *)mask);
-		__m64 vdest = load8888 (*(uint32_t *)dst);
+	    uint16_t tmp;
+	    uint16_t a;
+	    uint32_t m, d;
+	    uint32_t r;
+
+	    a = *mask++;
+	    d = *dst;
 
-		*(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+	    m = MUL_UN8 (sa, a, tmp);
+	    r = ADD_UN8 (m, d, tmp);
 
-		w -= 4;
-		dst += 4;
-		mask += 4;
-	    }
+	    *dst++ = r;
+	    w--;
+	}
+
+	while (w >= 4)
+	{
+	    __m64 vmask;
+	    __m64 vdest;
+
+	    vmask = load8888 (ldl_u((uint32_t *)mask));
+	    vdest = load8888 (*(uint32_t *)dst);
+
+	    *(uint32_t *)dst = store8888 (_mm_adds_pu8 (in (vsrca, vmask), vdest));
+
+	    dst += 4;
+	    mask += 4;
+	    w -= 4;
 	}
 
 	while (w--)
@@ -2694,7 +2759,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp,
 
 	while (w >= 8)
 	{
-	    *(__m64*)dst = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+	    *(__m64*)dst = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst);
 	    dst += 8;
 	    src += 8;
 	    w -= 8;
@@ -2752,7 +2817,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 
 	while (w >= 2)
 	{
-	    dst64 = _mm_adds_pu8 (*(__m64*)src, *(__m64*)dst);
+	    dst64 = _mm_adds_pu8 ((__m64)ldq_u((uint64_t *)src), *(__m64*)dst);
 	    *(uint64_t*)dst = to_uint64 (dst64);
 	    dst += 2;
 	    src += 2;
@@ -2825,6 +2890,14 @@ pixman_blt_mmx (uint32_t *src_bits,
 	dst_bytes += dst_stride;
 	w = byte_width;
 
+	while (w >= 1 && ((unsigned long)d & 1))
+	{
+	    *(uint8_t *)d = *(uint8_t *)s;
+	    w -= 1;
+	    s += 1;
+	    d += 1;
+	}
+
 	while (w >= 2 && ((unsigned long)d & 3))
 	{
 	    *(uint16_t *)d = *(uint16_t *)s;
@@ -2835,7 +2908,7 @@ pixman_blt_mmx (uint32_t *src_bits,
 
 	while (w >= 4 && ((unsigned long)d & 7))
 	{
-	    *(uint32_t *)d = *(uint32_t *)s;
+	    *(uint32_t *)d = ldl_u((uint32_t *)s);
 
 	    w -= 4;
 	    s += 4;
@@ -2869,14 +2942,14 @@ pixman_blt_mmx (uint32_t *src_bits,
 		  "%mm0", "%mm1", "%mm2", "%mm3",
 		  "%mm4", "%mm5", "%mm6", "%mm7");
 #else
-	    __m64 v0 = *(__m64 *)(s + 0);
-	    __m64 v1 = *(__m64 *)(s + 8);
-	    __m64 v2 = *(__m64 *)(s + 16);
-	    __m64 v3 = *(__m64 *)(s + 24);
-	    __m64 v4 = *(__m64 *)(s + 32);
-	    __m64 v5 = *(__m64 *)(s + 40);
-	    __m64 v6 = *(__m64 *)(s + 48);
-	    __m64 v7 = *(__m64 *)(s + 56);
+	    __m64 v0 = ldq_u((uint64_t *)(s + 0));
+	    __m64 v1 = ldq_u((uint64_t *)(s + 8));
+	    __m64 v2 = ldq_u((uint64_t *)(s + 16));
+	    __m64 v3 = ldq_u((uint64_t *)(s + 24));
+	    __m64 v4 = ldq_u((uint64_t *)(s + 32));
+	    __m64 v5 = ldq_u((uint64_t *)(s + 40));
+	    __m64 v6 = ldq_u((uint64_t *)(s + 48));
+	    __m64 v7 = ldq_u((uint64_t *)(s + 56));
 	    *(__m64 *)(d + 0)  = v0;
 	    *(__m64 *)(d + 8)  = v1;
 	    *(__m64 *)(d + 16) = v2;
@@ -2893,7 +2966,7 @@ pixman_blt_mmx (uint32_t *src_bits,
 	}
 	while (w >= 4)
 	{
-	    *(uint32_t *)d = *(uint32_t *)s;
+	    *(uint32_t *)d = ldl_u((uint32_t *)s);
 
 	    w -= 4;
 	    s += 4;
commit 5d98abb14ca9042af6d0ec7c14c8398cf4046b80
Author: Matt Turner <mattst88 at gmail.com>
Date:   Thu Sep 22 15:39:53 2011 -0400

    mmx: wrap x86/MMX inline assembly in ifdef USE_X86_MMX
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index eca6d25..8782d89 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -1784,7 +1784,7 @@ pixman_fill_mmx (uint32_t *bits,
     uint32_t byte_width;
     uint8_t     *byte_line;
 
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
     __m64 v1, v2, v3, v4, v5, v6, v7;
 #endif
 
@@ -1818,7 +1818,7 @@ pixman_fill_mmx (uint32_t *bits,
     fill = ((uint64_t)xor << 32) | xor;
     vfill = to_m64 (fill);
 
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
     __asm__ (
         "movq		%7,	%0\n"
         "movq		%7,	%1\n"
@@ -1864,7 +1864,7 @@ pixman_fill_mmx (uint32_t *bits,
 
 	while (w >= 64)
 	{
-#ifdef __GNUC__
+#if defined __GNUC__ && defined USE_X86_MMX
 	    __asm__ (
 	        "movq	%1,	  (%0)\n"
 	        "movq	%2,	 8(%0)\n"
@@ -2844,7 +2844,7 @@ pixman_blt_mmx (uint32_t *src_bits,
 
 	while (w >= 64)
 	{
-#if defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))
+#if (defined (__GNUC__) || (defined(__SUNPRO_C) && (__SUNPRO_C >= 0x590))) && defined USE_X86_MMX
 	    __asm__ (
 	        "movq	  (%1),	  %%mm0\n"
 	        "movq	 8(%1),	  %%mm1\n"
commit 02c1f1a022e86ced69fc91376232d75d5d6583c5
Author: Matt Turner <mattst88 at gmail.com>
Date:   Sun Jul 31 20:20:12 2011 +0000

    mmx: rename USE_MMX to USE_X86_MMX
    
    This will make upcoming ARM usage of pixman-mmx.c unambiguous.
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/configure.ac b/configure.ac
index dc523df..561cb67 100644
--- a/configure.ac
+++ b/configure.ac
@@ -303,7 +303,7 @@ CFLAGS=$xserver_save_CFLAGS
 
 AC_ARG_ENABLE(mmx,
    [AC_HELP_STRING([--disable-mmx],
-                   [disable MMX fast paths])],
+                   [disable x86 MMX fast paths])],
    [enable_mmx=$enableval], [enable_mmx=auto])
 
 if test $enable_mmx = no ; then
@@ -311,17 +311,17 @@ if test $enable_mmx = no ; then
 fi
 
 if test $have_mmx_intrinsics = yes ; then
-   AC_DEFINE(USE_MMX, 1, [use MMX compiler intrinsics])
+   AC_DEFINE(USE_X86_MMX, 1, [use x86 MMX compiler intrinsics])
 else
    MMX_CFLAGS=
 fi
 
 AC_MSG_RESULT($have_mmx_intrinsics)
 if test $enable_mmx = yes && test $have_mmx_intrinsics = no ; then
-   AC_MSG_ERROR([MMX intrinsics not detected])
+   AC_MSG_ERROR([x86 MMX intrinsics not detected])
 fi
 
-AM_CONDITIONAL(USE_MMX, test $have_mmx_intrinsics = yes)
+AM_CONDITIONAL(USE_X86_MMX, test $have_mmx_intrinsics = yes)
 
 dnl ===========================================================================
 dnl Check for SSE2
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 2421a4f..0932ce7 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -22,7 +22,7 @@ EXTRA_DIST =				\
 DISTCLEANFILES = $(BUILT_SOURCES)
 
 # mmx code
-if USE_MMX
+if USE_X86_MMX
 noinst_LTLIBRARIES += libpixman-mmx.la
 libpixman_mmx_la_SOURCES = \
 	pixman-mmx.c
diff --git a/pixman/Makefile.win32 b/pixman/Makefile.win32
index beff4a0..381f2cd 100644
--- a/pixman/Makefile.win32
+++ b/pixman/Makefile.win32
@@ -14,7 +14,7 @@ ifeq ($(SSE2_VAR),)
 SSE2_VAR=on
 endif
 
-MMX_CFLAGS = -DUSE_MMX -w14710 -w14714
+MMX_CFLAGS = -DUSE_X86_MMX -w14710 -w14714
 SSE2_CFLAGS = -DUSE_SSE2
 
 # MMX compilation flags
diff --git a/pixman/pixman-cpu.c b/pixman/pixman-cpu.c
index 973ed54..78d3033 100644
--- a/pixman/pixman-cpu.c
+++ b/pixman/pixman-cpu.c
@@ -337,7 +337,7 @@ pixman_have_arm_neon (void)
 
 #endif /* USE_ARM_SIMD || USE_ARM_NEON */
 
-#if defined(USE_MMX) || defined(USE_SSE2)
+#if defined(USE_X86_MMX) || defined(USE_SSE2)
 /* The CPU detection code needs to be in a file not compiled with
  * "-mmmx -msse", as gcc would generate CMOV instructions otherwise
  * that would lead to SIGILL instructions on old CPUs that don't have
@@ -564,7 +564,7 @@ pixman_have_sse2 (void)
 #endif
 
 #else /* __amd64__ */
-#ifdef USE_MMX
+#ifdef USE_X86_MMX
 #define pixman_have_mmx() TRUE
 #endif
 #ifdef USE_SSE2
@@ -581,7 +581,7 @@ _pixman_choose_implementation (void)
     imp = _pixman_implementation_create_general();
     imp = _pixman_implementation_create_fast_path (imp);
     
-#ifdef USE_MMX
+#ifdef USE_X86_MMX
     if (pixman_have_mmx ())
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index c044593..eca6d25 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -33,7 +33,7 @@
 #include <config.h>
 #endif
 
-#ifdef USE_MMX
+#ifdef USE_X86_MMX
 
 #include <mmintrin.h>
 #include "pixman-private.h"
@@ -3145,4 +3145,4 @@ _pixman_implementation_create_mmx (pixman_implementation_t *fallback)
     return imp;
 }
 
-#endif /* USE_MMX */
+#endif /* USE_X86_MMX */
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index 6e716c6..09f88c8 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -545,7 +545,7 @@ _pixman_implementation_create_fast_path (pixman_implementation_t *fallback);
 pixman_implementation_t *
 _pixman_implementation_create_noop (pixman_implementation_t *fallback);
 
-#ifdef USE_MMX
+#ifdef USE_X86_MMX
 pixman_implementation_t *
 _pixman_implementation_create_mmx (pixman_implementation_t *fallback);
 #endif