pixman: Branch 'master' - 3 commits
Søren Sandmann Pedersen
sandmann at kemper.freedesktop.org
Mon Jul 21 19:10:18 PDT 2008
TODO | 4 ++--
configure.ac | 4 +++-
pixman/pixman-pict.c | 11 ++++-------
pixman/pixman-sse.c | 4 ++--
4 files changed, 11 insertions(+), 12 deletions(-)
New commits:
commit db3fb5eb605c4e1a6fcb93902389a22fc496151c
Author: Søren Sandmann Pedersen <sandmann at daimi.au.dk>
Date: Mon Jul 21 22:06:02 2008 -0400
Don't require GCC 4.2 on x86-64
diff --git a/configure.ac b/configure.ac
index 6dcffb1..13563ed 100644
--- a/configure.ac
+++ b/configure.ac
@@ -224,7 +224,9 @@ CFLAGS="$CFLAGS -msse2 $SSE_CFLAGS"
AC_COMPILE_IFELSE([
#if defined(__GNUC__) && (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2))
-#error "Need GCC >= 4.2 for SSE2 intrinsics"
+# if !defined(__amd64__) && !defined(__x86_64__)
+# error "Need GCC >= 4.2 for SSE2 intrinsics on x86"
+# endif
#endif
#include <mmintrin.h>
#include <xmmintrin.h>
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 806e380..b918219 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1703,7 +1703,7 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
}
-#if defined(USE_SSE2) && defined (__GNUC__)
+#if defined(USE_SSE2) && defined(__GNUC__) && !defined(__x86_64__) && !defined(__amd64__)
/*
* Work around GCC bug causing crashes in Mozilla with SSE2
@@ -1715,6 +1715,9 @@ pixman_optimize_operator(pixman_op_t op, pixman_image_t *pSrc, pixman_image_t *p
* The __force_align_arg_pointer__ makes gcc generate a prologue that
* realigns the stack pointer to 16 bytes.
*
+ * On x86-64 this is not necessary because the standard ABI already
+ * calls for a 16 byte aligned stack.
+ *
* See https://bugs.freedesktop.org/show_bug.cgi?id=15693
*/
commit 53fa7133fdf7f5879f10847228f478983c480b79
Author: Søren Sandmann Pedersen <sandmann at daimi.au.dk>
Date: Mon Jul 21 21:55:55 2008 -0400
TODO
diff --git a/TODO b/TODO
index c16c274..94088ce 100644
--- a/TODO
+++ b/TODO
@@ -2,8 +2,6 @@
- SSE 2 issues:
- - Commented-out uses of fbCompositeCopyAreasse2()
-
- Use MM_HINT_NTA instead of MM_HINT_T0
- Use of fbCompositeOver_x888x8x8888sse2()
@@ -173,6 +171,8 @@
done:
+- Commented-out uses of fbCompositeCopyAreasse2()
+
- Consider whether calling regions region16 is really such a great
idea. Vlad wants 32 bit regions for Cairo. This will break X server
ABI, but should otherwise be mostly harmless, though a
commit 58ab45b85d1732da7c84a274acdca3bfcf1c36b1
Author: André Tupinambá <andrelrt at gmail.com>
Date: Mon Jul 21 21:53:20 2008 -0400
Use CopyAreasse2, plus a compatibility fix
diff --git a/pixman/pixman-pict.c b/pixman/pixman-pict.c
index 513e27a..806e380 100644
--- a/pixman/pixman-pict.c
+++ b/pixman/pixman-pict.c
@@ -1445,11 +1445,8 @@ static const FastPathInfo sse_fast_paths[] =
{ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_a8b8g8r8, PIXMAN_r5g6b5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF },
{ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8r8g8b8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF },
{ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_a8b8g8r8, PIXMAN_b5g6r5, fbCompositeSrc_8888RevNPx0565sse2, NEED_PIXBUF },
-#if 0
- /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
{ PIXMAN_OP_OVER, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2, 0 },
{ PIXMAN_OP_OVER, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2, 0 },
-#endif
{ PIXMAN_OP_ADD, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeSrcAdd_8000x8000sse2, 0 },
{ PIXMAN_OP_ADD, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeSrcAdd_8888x8888sse2, 0 },
@@ -1461,15 +1458,12 @@ static const FastPathInfo sse_fast_paths[] =
{ PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2, 0 },
{ PIXMAN_OP_SRC, PIXMAN_solid, PIXMAN_a8, PIXMAN_x8b8g8r8, fbCompositeSolidMaskSrc_nx8x8888sse2, 0 },
-#if 0
- /* FIXME: This code is commented out since it's apparently not actually faster than the generic code */
{ PIXMAN_OP_SRC, PIXMAN_a8r8g8b8, PIXMAN_null, PIXMAN_a8r8g8b8, fbCompositeCopyAreasse2, 0 },
{ PIXMAN_OP_SRC, PIXMAN_a8b8g8r8, PIXMAN_null, PIXMAN_a8b8g8r8, fbCompositeCopyAreasse2, 0 },
{ PIXMAN_OP_SRC, PIXMAN_x8r8g8b8, PIXMAN_null, PIXMAN_x8r8g8b8, fbCompositeCopyAreasse2, 0 },
{ PIXMAN_OP_SRC, PIXMAN_x8b8g8r8, PIXMAN_null, PIXMAN_x8b8g8r8, fbCompositeCopyAreasse2, 0 },
{ PIXMAN_OP_SRC, PIXMAN_r5g6b5, PIXMAN_null, PIXMAN_r5g6b5, fbCompositeCopyAreasse2, 0 },
{ PIXMAN_OP_SRC, PIXMAN_b5g6r5, PIXMAN_null, PIXMAN_b5g6r5, fbCompositeCopyAreasse2, 0 },
-#endif
{ PIXMAN_OP_IN, PIXMAN_a8, PIXMAN_null, PIXMAN_a8, fbCompositeIn_8x8sse2, 0 },
{ PIXMAN_OP_IN, PIXMAN_solid, PIXMAN_a8, PIXMAN_a8, fbCompositeIn_nx8x8sse2, 0 },
diff --git a/pixman/pixman-sse.c b/pixman/pixman-sse.c
index b1e41b9..7c4d952 100644
--- a/pixman/pixman-sse.c
+++ b/pixman/pixman-sse.c
@@ -4425,12 +4425,12 @@ pixmanBltsse2 (uint32_t *src_bits,
while (w >= 64)
{
+ __m128i xmm0, xmm1, xmm2, xmm3;
+
/* 128 bytes ahead */
cachePrefetch (((__m128i*)s) + 8);
cachePrefetch (((__m128i*)d) + 8);
- __m128i xmm0, xmm1, xmm2, xmm3;
-
xmm0 = load128Unaligned ((__m128i*)(s));
xmm1 = load128Unaligned ((__m128i*)(s+16));
xmm2 = load128Unaligned ((__m128i*)(s+32));
More information about the xorg-commit
mailing list