pixman: Branch 'master' - 3 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Mon Jul 21 19:10:18 PDT 2008


 TODO                 |    4 ++--
 configure.ac         |    4 +++-
 pixman/pixman-pict.c |   11 ++++-------
 pixman/pixman-sse.c  |    4 ++--
 4 files changed, 11 insertions(+), 12 deletions(-)

New commits:
commit db3fb5eb605c4e1a6fcb93902389a22fc496151c
Author: Søren Sandmann Pedersen <sandmann at daimi.au.dk>
Date:   Mon Jul 21 22:06:02 2008 -0400

    Don't require GCC 4.2 on x86-64

diff --git a/configure.ac b/configure.ac
index 6dcffb1..13563ed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -224,7 +224,9 @@ CFLAGS="$CFLAGS -msse2 $SSE_CFLAGS"
 
 AC_COMPILE_IFELSE([
 #if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2))
-#error "Need GCC >= 4.2 for SSE2 intrinsics"
+#   if !defined(__amd64__) && !defined(__x86_64__)
+#      error "Need GCC >= 4.2 for SSE2 intrinsics on x86"
+#   endif
 #endif
 #include <mmintrin.h>
 #include <xmmintrin.h>
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 806e380..b918219 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1703,7 +1703,7 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
 
 }
 
-#if defined(USE_SSE2) && defined (__GNUC__)
+#if defined(USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
 
 /*
  * Work around GCC bug causing crashes in Mozilla with SSE2
@@ -1715,6 +1715,9 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
  * The __force_align_arg_pointer__ makes gcc generate a prologue that
  * realigns the stack pointer to 16 bytes.
  *
+ * On x86-64 this is not necessary because the standard ABI already
+ * calls for a 16 byte aligned stack.
+ *
  * See https://bugs.freedesktop.org/show_bug.cgi?id=15693
  */
 
commit 53fa7133fdf7f5879f10847228f478983c480b79
Author: Søren Sandmann Pedersen <sandmann at daimi.au.dk>
Date:   Mon Jul 21 21:55:55 2008 -0400

    TODO

diff --git a/TODO b/TODO
index c16c274..94088ce 100644
--- a/TODO
+++ b/TODO
@@ -2,8 +2,6 @@
 
   - SSE 2 issues:
 
-      - Commented-out uses of fbCompositeCopyAreasse2()
-
       - Use MM_HINT_NTA instead of MM_HINT_T0
 
       - Use of fbCompositeOver_x888x8x8888sse2()
@@ -173,6 +171,8 @@
 
 done:
 
+- Commented-out uses of fbCompositeCopyAreasse2()
+
 - Consider whether calling regions region16 is really such a great
   idea. Vlad wants 32 bit regions for Cairo. This will break X server
   ABI, but should otherwise be mostly harmless, though a
commit 58ab45b85d1732da7c84a274acdca3bfcf1c36b1
Author: André Tupinambá <andrelrt at gmail.com>
Date:   Mon Jul 21 21:53:20 2008 -0400

    Use CopyAreasse2, plus a compatibility fix

diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 513e27a..806e380 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1445,11 +1445,8 @@ static const FastPathInfo sse_fast_paths[] =
     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5,   fbCompositeSrc_8888RevNPx0565sse2,     NEED_PIXBUF },
-#if 0
-    /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
     { PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2,               0 },
     { PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2,               0 },
-#endif
 
     { PIXMAN_OP_ADD,  PIXMAN_a8,       PIXMAN_null,     PIXMAN_a8,       fbCompositeSrcAdd_8000x8000sse2,       0 },
     { PIXMAN_OP_ADD,  PIXMAN_a8r8g8b8, PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888sse2,       0 },
@@ -1461,15 +1458,12 @@ static const FastPathInfo sse_fast_paths[] =
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2,  0 },
     { PIXMAN_OP_SRC, PIXMAN_solid,     PIXMAN_a8,       PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2,  0 },
 
-#if 0
-    /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
     { PIXMAN_OP_SRC, PIXMAN_a8r8g8b8,  PIXMAN_null,     PIXMAN_a8r8g8b8, fbCompositeCopyAreasse2,               0 },
     { PIXMAN_OP_SRC, PIXMAN_a8b8g8r8,  PIXMAN_null,     PIXMAN_a8b8g8r8, fbCompositeCopyAreasse2,               0 },
     { PIXMAN_OP_SRC, PIXMAN_x8r8g8b8,  PIXMAN_null,     PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2,               0 },
     { PIXMAN_OP_SRC, PIXMAN_x8b8g8r8,  PIXMAN_null,     PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2,               0 },
     { PIXMAN_OP_SRC, PIXMAN_r5g6b5,    PIXMAN_null,     PIXMAN_r5g6b5,   fbCompositeCopyAreasse2,               0 },
     { PIXMAN_OP_SRC, PIXMAN_b5g6r5,    PIXMAN_null,     PIXMAN_b5g6r5,   fbCompositeCopyAreasse2,               0 },
-#endif
 
     { PIXMAN_OP_IN,  PIXMAN_a8,        PIXMAN_null,     PIXMAN_a8,       fbCompositeIn_8x8sse2,                 0 },
     { PIXMAN_OP_IN,  PIXMAN_solid,     PIXMAN_a8,       PIXMAN_a8,       fbCompositeIn_nx8x8sse2,               0 },
diff --git a/pixman/pixman-sse.c b/pixman/pixman-sse.c
index b1e41b9..7c4d952 100644
--- a/pixman/pixman-sse.c
+++ b/pixman/pixman-sse.c
@@ -4425,12 +4425,12 @@ pixmanBltsse2 (uint32_t *src_bits,
 
         while (w >= 64)
         {
+            __m128i xmm0, xmm1, xmm2, xmm3;
+
             /* 128 bytes ahead */
             cachePrefetch (((__m128i*)s) + 8);
             cachePrefetch (((__m128i*)d) + 8);
 
-            __m128i xmm0, xmm1, xmm2, xmm3;
-
             xmm0 = load128Unaligned ((__m128i*)(s));
             xmm1 = load128Unaligned ((__m128i*)(s+16));
             xmm2 = load128Unaligned ((__m128i*)(s+32));


More information about the xorg-commit mailing list