pixman: Branch 'master' - 2 commits
Matt Turner
mattst88 at kemper.freedesktop.org
Tue Feb 21 09:50:02 PST 2012
pixman/pixman-mmx.c | 81 +++++++++++++++++++++++-----------------------------
1 file changed, 37 insertions(+), 44 deletions(-)
New commits:
commit 84221f4c1687b8ea14e9cbdc78b2ba7258e62c9e
Author: Matt Turner <mattst88 at gmail.com>
Date: Sun Feb 19 18:10:03 2012 -0500
mmx: Use _mm_shuffle_pi16
The pshufw x86 instruction is part of Extended 3DNow! and SSE1. The
equivalent ARM wshufh instruction was available from the first iwMMXt
instrucion set.
This instruction is already used in the SSE2 code.
Reduces code size by ~9%.
amd64
text data bss dec hex filename
29925 2240 0 32165 7da5 .libs/libpixman_mmx_la-pixman-mmx.o
27237 2240 0 29477 7325 .libs/libpixman_mmx_la-pixman-mmx.o
x86
text data bss dec hex filename
27677 1792 0 29469 731d .libs/libpixman_mmx_la-pixman-mmx.o
24959 1792 0 26751 687f .libs/libpixman_mmx_la-pixman-mmx.o
arm
text data bss dec hex filename
30176 1792 0 31968 7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
27384 1792 0 29176 71f8 .libs/libpixman_iwmmxt_la-pixman-mmx.o
Signed-off-by: Matt Turner <mattst88 at gmail.com>
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f8950be..48fc665 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -69,8 +69,24 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
);
return __A;
}
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
+{
+ __m64 ret;
+
+ asm("pshufw %2, %1, %0\n\t"
+ : "=y" (ret)
+ : "y" (__A), "K" (__N)
+ );
+
+ return ret;
+}
#endif
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
@@ -140,9 +156,6 @@ typedef struct
mmxdatafield mmx_mask_2;
mmxdatafield mmx_mask_3;
mmxdatafield mmx_full_alpha;
- mmxdatafield mmx_ffff0000ffff0000;
- mmxdatafield mmx_0000ffff00000000;
- mmxdatafield mmx_000000000000ffff;
mmxdatafield mmx_4x0101;
} mmx_data_t;
@@ -168,9 +181,6 @@ static const mmx_data_t c =
MMXDATA_INIT (.mmx_mask_2, 0xffff0000ffffffff),
MMXDATA_INIT (.mmx_mask_3, 0x0000ffffffffffff),
MMXDATA_INIT (.mmx_full_alpha, 0x00ff000000000000),
- MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000),
- MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000),
- MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff),
MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101),
};
@@ -249,52 +259,19 @@ pix_add (__m64 a, __m64 b)
static force_inline __m64
expand_alpha (__m64 pixel)
{
- __m64 t1, t2;
-
- t1 = shift (pixel, -48);
- t2 = shift (t1, 16);
- t1 = _mm_or_si64 (t1, t2);
- t2 = shift (t1, 32);
- t1 = _mm_or_si64 (t1, t2);
-
- return t1;
+ return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (3, 3, 3, 3));
}
static force_inline __m64
expand_alpha_rev (__m64 pixel)
{
- __m64 t1, t2;
-
- /* move alpha to low 16 bits and zero the rest */
- t1 = shift (pixel, 48);
- t1 = shift (t1, -48);
-
- t2 = shift (t1, 16);
- t1 = _mm_or_si64 (t1, t2);
- t2 = shift (t1, 32);
- t1 = _mm_or_si64 (t1, t2);
-
- return t1;
+ return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (0, 0, 0, 0));
}
static force_inline __m64
invert_colors (__m64 pixel)
{
- __m64 x, y, z;
-
- x = y = z = pixel;
-
- x = _mm_and_si64 (x, MC (ffff0000ffff0000));
- y = _mm_and_si64 (y, MC (000000000000ffff));
- z = _mm_and_si64 (z, MC (0000ffff00000000));
-
- y = shift (y, 32);
- z = shift (z, -32);
-
- x = _mm_or_si64 (x, y);
- x = _mm_or_si64 (x, z);
-
- return x;
+ return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (3, 0, 1, 2));
}
static force_inline __m64
commit 14208344964f341a7b4a704b05cf4804c23792e9
Author: Matt Turner <mattst88 at gmail.com>
Date: Sun Feb 19 01:32:31 2012 -0500
mmx: Use _mm_mulhi_pu16
The pmulhuw x86 instruction is part of Extended 3DNow! and SSE1. The
equivalent ARM wmuluh instruction was available from the first iwMMXt
instrucion set.
This instruction is already used in the SSE2 code.
Reduces code size by ~5%.
amd64
text data bss dec hex filename
31325 2240 0 33565 831d .libs/libpixman_mmx_la-pixman-mmx.o
29925 2240 0 32165 7da5 .libs/libpixman_mmx_la-pixman-mmx.o
x86
text data bss dec hex filename
29165 1792 0 30957 78ed .libs/libpixman_mmx_la-pixman-mmx.o
27677 1792 0 29469 731d .libs/libpixman_mmx_la-pixman-mmx.o
arm
text data bss dec hex filename
31632 1792 0 33424 8290 .libs/libpixman_iwmmxt_la-pixman-mmx.o
30176 1792 0 31968 7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
Signed-off-by: Matt Turner <mattst88 at gmail.com>
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index d89c3d6..f8950be 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -56,6 +56,21 @@ _mm_empty (void)
}
#endif
+#ifdef USE_X86_MMX
+/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
+ * instructions to be generated that we don't want. Just duplicate the
+ * functions we want to use. */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+ asm("pmulhuw %1, %0\n\t"
+ : "+y" (__A)
+ : "y" (__B)
+ );
+ return __A;
+}
+#endif
+
/* Notes about writing mmx code
*
* give memory operands as the second operand. If you give it as the
@@ -128,6 +143,7 @@ typedef struct
mmxdatafield mmx_ffff0000ffff0000;
mmxdatafield mmx_0000ffff00000000;
mmxdatafield mmx_000000000000ffff;
+ mmxdatafield mmx_4x0101;
} mmx_data_t;
#if defined(_MSC_VER)
@@ -155,6 +171,7 @@ static const mmx_data_t c =
MMXDATA_INIT (.mmx_ffff0000ffff0000, 0xffff0000ffff0000),
MMXDATA_INIT (.mmx_0000ffff00000000, 0x0000ffff00000000),
MMXDATA_INIT (.mmx_000000000000ffff, 0x000000000000ffff),
+ MMXDATA_INIT (.mmx_4x0101, 0x0101010101010101),
};
#ifdef USE_CVT_INTRINSICS
@@ -218,8 +235,7 @@ pix_multiply (__m64 a, __m64 b)
res = _mm_mullo_pi16 (a, b);
res = _mm_adds_pu16 (res, MC (4x0080));
- res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
- res = _mm_srli_pi16 (res, 8);
+ res = _mm_mulhi_pu16 (res, MC (4x0101));
return res;
}
More information about the xorg-commit
mailing list