pixman: Branch 'master' - 2 commits

Matt Turner mattst88 at kemper.freedesktop.org
Tue Feb 21 09:50:02 PST 2012


 pixman/pixman-mmx.c |   81 +++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 44 deletions(-)

New commits:
commit 84221f4c1687b8ea14e9cbdc78b2ba7258e62c9e
Author: Matt Turner <mattst88 at gmail.com>
Date:   Sun Feb 19 18:10:03 2012 -0500

    mmx: Use _mm_shuffle_pi16
    
    The pshufw x86 instruction is part of Extended 3DNow! and SSE1. The
    equivalent ARM wshufh instruction was available from the first iwMMXt
    instrucion set.
    
    This instruction is already used in the SSE2 code.
    
    Reduces code size by ~9%.
    
    amd64
      text    data     bss     dec     hex filename
     29925    2240       0   32165    7da5 .libs/libpixman_mmx_la-pixman-mmx.o
     27237    2240       0   29477    7325 .libs/libpixman_mmx_la-pixman-mmx.o
    
    x86
      text    data     bss     dec     hex filename
     27677    1792       0   29469    731d .libs/libpixman_mmx_la-pixman-mmx.o
     24959    1792       0   26751    687f .libs/libpixman_mmx_la-pixman-mmx.o
    
    arm
      text    data     bss     dec     hex filename
     30176    1792       0   31968    7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
     27384    1792       0   29176    71f8 .libs/libpixman_iwmmxt_la-pixman-mmx.o
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f8950be..48fc665 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -69,8 +69,24 @@ _mm_mulhi_pu16 (__m64 __A, __m64 __B)
     );
     return __A;
 }
+
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_shuffle_pi16 (__m64 __A, int8_t const __N)
+{
+    __m64 ret;
+
+    asm("pshufw %2, %1, %0\n\t"
+	: "=y" (ret)
+	: "y" (__A), "K" (__N)
+    );
+
+    return ret;
+}
 #endif
 
+#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
+ (((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
+
 /* Notes about writing mmx code
  *
  * give memory operands as the second operand. If you give it as the
@@ -140,9 +156,6 @@ typedef struct
     mmxdatafield mmx_mask_2;
     mmxdatafield mmx_mask_3;
     mmxdatafield mmx_full_alpha;
-    mmxdatafield mmx_ffff0000ffff0000;
-    mmxdatafield mmx_0000ffff00000000;
-    mmxdatafield mmx_000000000000ffff;
     mmxdatafield mmx_4x0101;
 } mmx_data_t;
 
@@ -168,9 +181,6 @@ static const mmx_data_t c =
     MMXDATA_INIT (.mmx_mask_2,                   0xffff0000ffffffff),
     MMXDATA_INIT (.mmx_mask_3,                   0x0000ffffffffffff),
     MMXDATA_INIT (.mmx_full_alpha,               0x00ff000000000000),
-    MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
-    MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
-    MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
     MMXDATA_INIT (.mmx_4x0101,                   0x0101010101010101),
 };
 
@@ -249,52 +259,19 @@ pix_add (__m64 a, __m64 b)
 static force_inline __m64
 expand_alpha (__m64 pixel)
 {
-    __m64 t1, t2;
-
-    t1 = shift (pixel, -48);
-    t2 = shift (t1, 16);
-    t1 = _mm_or_si64 (t1, t2);
-    t2 = shift (t1, 32);
-    t1 = _mm_or_si64 (t1, t2);
-
-    return t1;
+    return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (3, 3, 3, 3));
 }
 
 static force_inline __m64
 expand_alpha_rev (__m64 pixel)
 {
-    __m64 t1, t2;
-
-    /* move alpha to low 16 bits and zero the rest */
-    t1 = shift (pixel,  48);
-    t1 = shift (t1, -48);
-
-    t2 = shift (t1, 16);
-    t1 = _mm_or_si64 (t1, t2);
-    t2 = shift (t1, 32);
-    t1 = _mm_or_si64 (t1, t2);
-
-    return t1;
+    return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (0, 0, 0, 0));
 }
 
 static force_inline __m64
 invert_colors (__m64 pixel)
 {
-    __m64 x, y, z;
-
-    x = y = z = pixel;
-
-    x = _mm_and_si64 (x, MC (ffff0000ffff0000));
-    y = _mm_and_si64 (y, MC (000000000000ffff));
-    z = _mm_and_si64 (z, MC (0000ffff00000000));
-
-    y = shift (y, 32);
-    z = shift (z, -32);
-
-    x = _mm_or_si64 (x, y);
-    x = _mm_or_si64 (x, z);
-
-    return x;
+    return _mm_shuffle_pi16(pixel, _MM_SHUFFLE (3, 0, 1, 2));
 }
 
 static force_inline __m64
commit 14208344964f341a7b4a704b05cf4804c23792e9
Author: Matt Turner <mattst88 at gmail.com>
Date:   Sun Feb 19 01:32:31 2012 -0500

    mmx: Use _mm_mulhi_pu16
    
    The pmulhuw x86 instruction is part of Extended 3DNow! and SSE1. The
    equivalent ARM wmuluh instruction was available from the first iwMMXt
    instrucion set.
    
    This instruction is already used in the SSE2 code.
    
    Reduces code size by ~5%.
    
    amd64
      text    data     bss     dec     hex filename
     31325    2240       0   33565    831d .libs/libpixman_mmx_la-pixman-mmx.o
     29925    2240       0   32165    7da5 .libs/libpixman_mmx_la-pixman-mmx.o
    
    x86
      text    data     bss     dec     hex filename
     29165    1792       0   30957    78ed .libs/libpixman_mmx_la-pixman-mmx.o
     27677    1792       0   29469    731d .libs/libpixman_mmx_la-pixman-mmx.o
    
    arm
      text    data     bss     dec     hex filename
     31632    1792       0   33424    8290 .libs/libpixman_iwmmxt_la-pixman-mmx.o
     30176    1792       0   31968    7ce0 .libs/libpixman_iwmmxt_la-pixman-mmx.o
    
    Signed-off-by: Matt Turner <mattst88 at gmail.com>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index d89c3d6..f8950be 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -56,6 +56,21 @@ _mm_empty (void)
 }
 #endif
 
+#ifdef USE_X86_MMX
+/* We have to compile with -msse to use xmmintrin.h, but that causes SSE
+ * instructions to be generated that we don't want. Just duplicate the
+ * functions we want to use.  */
+extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mulhi_pu16 (__m64 __A, __m64 __B)
+{
+    asm("pmulhuw %1, %0\n\t"
+	: "+y" (__A)
+	: "y" (__B)
+    );
+    return __A;
+}
+#endif
+
 /* Notes about writing mmx code
  *
  * give memory operands as the second operand. If you give it as the
@@ -128,6 +143,7 @@ typedef struct
     mmxdatafield mmx_ffff0000ffff0000;
     mmxdatafield mmx_0000ffff00000000;
     mmxdatafield mmx_000000000000ffff;
+    mmxdatafield mmx_4x0101;
 } mmx_data_t;
 
 #if defined(_MSC_VER)
@@ -155,6 +171,7 @@ static const mmx_data_t c =
     MMXDATA_INIT (.mmx_ffff0000ffff0000,         0xffff0000ffff0000),
     MMXDATA_INIT (.mmx_0000ffff00000000,         0x0000ffff00000000),
     MMXDATA_INIT (.mmx_000000000000ffff,         0x000000000000ffff),
+    MMXDATA_INIT (.mmx_4x0101,                   0x0101010101010101),
 };
 
 #ifdef USE_CVT_INTRINSICS
@@ -218,8 +235,7 @@ pix_multiply (__m64 a, __m64 b)
 
     res = _mm_mullo_pi16 (a, b);
     res = _mm_adds_pu16 (res, MC (4x0080));
-    res = _mm_adds_pu16 (res, _mm_srli_pi16 (res, 8));
-    res = _mm_srli_pi16 (res, 8);
+    res = _mm_mulhi_pu16 (res, MC (4x0101));
 
     return res;
 }


More information about the xorg-commit mailing list