pixman: Branch 'master' - 2 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Tue Jul 10 23:22:01 PDT 2012


 pixman/pixman-mmx.c |   13 +-
 pixman/pixman-x86.c |  337 ++++++++++++++++++++++------------------------------
 2 files changed, 157 insertions(+), 193 deletions(-)

New commits:
commit 56321eff65832791252c7c324930d14c44d4d5f7
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Mon Jul 9 06:58:59 2012 -0400

    Make pixman-mmx.c compile on x86-32 without optimization
    
    When not optimizing, write _mm_shuffle_pi16() as a statement
    expression with inline assembly. That way we avoid
    __builtin_ia32_pshufw(), which is only available when compiling with
    -msse, while still allowing the non-optimizing gcc to understand that
    the second argument is a compile time constant.
    
    Tested-by: Knut Petersen <knut_petersen at t-online.de>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 5441d6b..74a5e87 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -105,8 +105,17 @@ _mm_shuffle_pi16 (__m64 __A, int8_t const __N)
     return ret;
 }
 #  else
-#   define _mm_shuffle_pi16(A, N) \
-    ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N)))
+#   define _mm_shuffle_pi16(A, N)					\
+    ({									\
+	__m64 ret;							\
+									\
+	asm ("pshufw %2, %1, %0\n\t"					\
+	     : "=y" (ret)						\
+	     : "y" (A), "K" ((const int8_t)N)				\
+	);								\
+									\
+	ret;								\
+    })
 #  endif
 # endif
 #endif
commit 0c81957e9b4f83944075167ae27a955bb253e267
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Thu Jun 28 15:53:14 2012 -0400

    Cleanups and simplifications in x86 CPU feature detection
    
    A new function pixman_cpuid() is added that runs the cpuid instruction
    and returns the results. On GCC this function uses inline assembly; on
    MSVC, the function calls the __cpuid intrinsic.
    
    There is also a new function called have_cpuid() which detects whether
    cpuid is available. On x86-64 and MSVC, it simply returns TRUE; on
    x86-32 bit, it checks whether the 22nd bit of eflags can be
    modified. On MSVC this does have the consequence that pixman will no
    longer work CPUS without cpuid (ie., older than 486 and some 486
    models).
    
    These two functions together makes it possible to write a generic
    detect_cpu_features() in plain C. This function is then used in a new
    have_feature() function that checks whether a specific set of feature
    bits is available.
    
    Aside from the cleanups and simplifications, the main benefit from
    this patch is that pixman now can do feature detection on x86-64, so
    that newer instruction sets such as SSSE3 and SSE4.1 can be used. (And
    apparently the assumption that x86-64 CPUs always have MMX and SSE2 is
    no longer correct: Knight's Corner is x86-64, but doesn't have them).
    
    V2: Rename the constants in the getisax() code, as pointed out by Alan
    Coopersmith. Also reinstate the result variable and initialize
    features to 0.
    
    V3: Fixes for the fact that the upper 32 bits of a 64 bit register are
    zeroed whenever the corresponding 32 bit register is written to.
    
    V4: Fixes for the fact that in 32 bit mode, when gcc is not optimizing
    there were not enough registers available. The new code uses the "a",
    "b", "c", and "d" constraints instead, and has two separate versions
    for 32 and 64 bit modes.

diff --git a/pixman/pixman-x86.c b/pixman/pixman-x86.c
index 52ad3df..57e4d1f 100644
--- a/pixman/pixman-x86.c
+++ b/pixman/pixman-x86.c
@@ -32,249 +32,204 @@
  * that would lead to SIGILL instructions on old CPUs that don't have
  * it.
  */
-#if !defined(__amd64__) && !defined(__x86_64__) && !defined(_M_AMD64)
-
-#ifdef HAVE_GETISAX
-#include <sys/auxv.h>
-#endif
 
 typedef enum
 {
-    NO_FEATURES = 0,
-    MMX = 0x1,
-    MMX_EXTENSIONS = 0x2,
-    SSE = 0x6,
-    SSE2 = 0x8,
-    CMOV = 0x10
+    X86_MMX			= (1 << 0),
+    X86_MMX_EXTENSIONS		= (1 << 1),
+    X86_SSE			= (1 << 2) | X86_MMX_EXTENSIONS,
+    X86_SSE2			= (1 << 3),
+    X86_CMOV			= (1 << 4)
 } cpu_features_t;
 
+#ifdef HAVE_GETISAX
 
-static unsigned int
+#include <sys/auxv.h>
+
+static cpu_features_t
 detect_cpu_features (void)
 {
-    unsigned int features = 0;
+    cpu_features_t features = 0;
     unsigned int result = 0;
-    
-#ifdef HAVE_GETISAX
+
     if (getisax (&result, 1))
     {
 	if (result & AV_386_CMOV)
-	    features |= CMOV;
+	    features |= X86_CMOV;
 	if (result & AV_386_MMX)
-	    features |= MMX;
+	    features |= X86_MMX;
 	if (result & AV_386_AMD_MMX)
-	    features |= MMX_EXTENSIONS;
+	    features |= X86_MMX_EXTENSIONS;
 	if (result & AV_386_SSE)
-	    features |= SSE;
+	    features |= X86_SSE;
 	if (result & AV_386_SSE2)
-	    features |= SSE2;
+	    features |= X86_SSE2;
     }
+
+    return features;
+}
+
+#else
+
+#define _PIXMAN_X86_64							\
+    (defined(__amd64__) || defined(__x86_64__) || defined(_M_AMD64))
+
+static pixman_bool_t
+have_cpuid (void)
+{
+#if _PIXMAN_X86_64 || defined (_MSC_VER)
+
+    return TRUE;
+
+#elif defined (__GNUC__)
+    uint32_t result;
+
+    __asm__ volatile (
+        "pushf"				"\n\t"
+        "pop %%eax"			"\n\t"
+        "mov %%eax, %%ecx"		"\n\t"
+        "xor $0x00200000, %%eax"	"\n\t"
+        "push %%eax"			"\n\t"
+        "popf"				"\n\t"
+        "pushf"				"\n\t"
+        "pop %%eax"			"\n\t"
+        "xor %%ecx, %%eax"		"\n\t"
+	"mov %%eax, %0"			"\n\t"
+	: "=r" (result)
+	:
+	: "%eax", "%ecx");
+
+    return !!result;
+
 #else
-    char vendor[13];
-#ifdef _MSC_VER
-    int vendor0 = 0, vendor1, vendor2;
+#error "Unknown compiler"
 #endif
-    vendor[0] = 0;
-    vendor[12] = 0;
-    
-#ifdef __GNUC__
-    /* see p. 118 of amd64 instruction set manual Vol3 */
-    /* We need to be careful about the handling of %ebx and
-     * %esp here. We can't declare either one as clobbered
+}
+
+static void
+pixman_cpuid (uint32_t feature,
+	      uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d)
+{
+#if defined (__GNUC__)
+
+#if _PIXMAN_X86_64
+    __asm__ volatile (
+        "cpuid"				"\n\t"
+	: "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d)
+	: "a" (feature));
+#else
+    /* On x86-32 we need to be careful about the handling of %ebx
+     * and %esp. We can't declare either one as clobbered
      * since they are special registers (%ebx is the "PIC
      * register" holding an offset to global data, %esp the
-     * stack pointer), so we need to make sure they have their
-     * original values when we access the output operands.
+     * stack pointer), so we need to make sure that %ebx is
+     * preserved, and that %esp has its original value when
+     * accessing the output operands.
      */
-    __asm__ (
-        "pushf\n"
-        "pop %%eax\n"
-        "mov %%eax, %%ecx\n"
-        "xor $0x00200000, %%eax\n"
-        "push %%eax\n"
-        "popf\n"
-        "pushf\n"
-        "pop %%eax\n"
-        "mov $0x0, %%edx\n"
-        "xor %%ecx, %%eax\n"
-        "jz 1f\n"
-	
-        "mov $0x00000000, %%eax\n"
-        "push %%ebx\n"
-        "cpuid\n"
-        "mov %%ebx, %%eax\n"
-        "pop %%ebx\n"
-        "mov %%eax, %1\n"
-        "mov %%edx, %2\n"
-        "mov %%ecx, %3\n"
-        "mov $0x00000001, %%eax\n"
-        "push %%ebx\n"
-        "cpuid\n"
-        "pop %%ebx\n"
-        "1:\n"
-        "mov %%edx, %0\n"
-	: "=r" (result),
-	  "=m" (vendor[0]),
-	  "=m" (vendor[4]),
-	  "=m" (vendor[8])
-	:
-	: "%eax", "%ecx", "%edx"
-        );
-    
+    __asm__ volatile (
+	"xchg %%ebx, %1"		"\n\t"
+	"cpuid"				"\n\t"
+	"xchg %%ebx, %1"		"\n\t"
+	: "=a" (*a), "=r" (*b), "=c" (*c), "=d" (*d)
+	: "a" (feature));
+#endif
+
 #elif defined (_MSC_VER)
-    
-    _asm {
-	pushfd
-	    pop eax
-	    mov ecx, eax
-	    xor eax, 00200000h
-	    push eax
-	    popfd
-	    pushfd
-	    pop eax
-	    mov edx, 0
-	    xor eax, ecx
-	    jz nocpuid
-	    
-	    mov eax, 0
-	    push ebx
-	    cpuid
-	    mov eax, ebx
-	    pop ebx
-	    mov vendor0, eax
-	    mov vendor1, edx
-	    mov vendor2, ecx
-	    mov eax, 1
-	    push ebx
-	    cpuid
-	    pop ebx
-	    nocpuid:
-	    mov result, edx
-	    }
-    memmove (vendor + 0, &vendor0, 4);
-    memmove (vendor + 4, &vendor1, 4);
-    memmove (vendor + 8, &vendor2, 4);
-    
+    int info[4];
+
+    __cpuid (info, feature);
+
+    *a = info[0];
+    *b = info[1];
+    *c = info[2];
+    *d = info[3];
 #else
-#   error unsupported compiler
+#error Unknown compiler
 #endif
-    
-    features = 0;
-    if (result)
+}
+
+static cpu_features_t
+detect_cpu_features (void)
+{
+    uint32_t a, b, c, d;
+    cpu_features_t features = 0;
+
+    if (!have_cpuid())
+	return features;
+
+    /* Get feature bits */
+    pixman_cpuid (0x01, &a, &b, &c, &d);
+    if (d & (1 << 15))
+	features |= X86_CMOV;
+    if (d & (1 << 23))
+	features |= X86_MMX;
+    if (d & (1 << 25))
+	features |= X86_SSE;
+    if (d & (1 << 26))
+	features |= X86_SSE2;
+
+    /* Check for AMD specific features */
+    if ((features & X86_MMX) && !(features & X86_SSE))
     {
-	/* result now contains the standard feature bits */
-	if (result & (1 << 15))
-	    features |= CMOV;
-	if (result & (1 << 23))
-	    features |= MMX;
-	if (result & (1 << 25))
-	    features |= SSE;
-	if (result & (1 << 26))
-	    features |= SSE2;
-	if ((features & MMX) && !(features & SSE) &&
-	    (strcmp (vendor, "AuthenticAMD") == 0 ||
-	     strcmp (vendor, "Geode by NSC") == 0))
+	char vendor[13];
+
+	/* Get vendor string */
+	memset (vendor, 0, sizeof vendor);
+
+	pixman_cpuid (0x00, &a, &b, &c, &d);
+	memcpy (vendor + 0, &b, 4);
+	memcpy (vendor + 4, &d, 4);
+	memcpy (vendor + 8, &c, 4);
+
+	if (strcmp (vendor, "AuthenticAMD") == 0 ||
+	    strcmp (vendor, "Geode by NSC") == 0)
 	{
-	    /* check for AMD MMX extensions */
-#ifdef __GNUC__
-	    __asm__ (
-	        "	push %%ebx\n"
-	        "	mov $0x80000000, %%eax\n"
-	        "	cpuid\n"
-	        "	xor %%edx, %%edx\n"
-	        "	cmp $0x1, %%eax\n"
-	        "	jge 2f\n"
-	        "	mov $0x80000001, %%eax\n"
-	        "	cpuid\n"
-	        "2:\n"
-	        "	pop %%ebx\n"
-	        "	mov %%edx, %0\n"
-		: "=r" (result)
-		:
-		: "%eax", "%ecx", "%edx"
-	        );
-#elif defined _MSC_VER
-	    _asm {
-		push ebx
-		    mov eax, 80000000h
-		    cpuid
-		    xor edx, edx
-		    cmp eax, 1
-		    jge notamd
-		    mov eax, 80000001h
-		    cpuid
-		    notamd:
-		    pop ebx
-		    mov result, edx
-		    }
-#endif
-	    if (result & (1 << 22))
-		features |= MMX_EXTENSIONS;
+	    pixman_cpuid (0x80000000, &a, &b, &c, &d);
+	    if (a >= 0x80000001)
+	    {
+		pixman_cpuid (0x80000001, &a, &b, &c, &d);
+
+		if (d & (1 << 22))
+		    features |= X86_MMX_EXTENSIONS;
+	    }
 	}
     }
-#endif /* HAVE_GETISAX */
-    
+
     return features;
 }
 
-#ifdef USE_X86_MMX
-static pixman_bool_t
-pixman_have_mmx (void)
-{
-    static pixman_bool_t initialized = FALSE;
-    static pixman_bool_t mmx_present;
-    
-    if (!initialized)
-    {
-	unsigned int features = detect_cpu_features ();
-	mmx_present = (features & (MMX | MMX_EXTENSIONS)) == (MMX | MMX_EXTENSIONS);
-	initialized = TRUE;
-    }
-    
-    return mmx_present;
-}
 #endif
 
-#ifdef USE_SSE2
 static pixman_bool_t
-pixman_have_sse2 (void)
+have_feature (cpu_features_t feature)
 {
-    static pixman_bool_t initialized = FALSE;
-    static pixman_bool_t sse2_present;
-    
+    static pixman_bool_t initialized;
+    static cpu_features_t features;
+
     if (!initialized)
     {
-	unsigned int features = detect_cpu_features ();
-	sse2_present = (features & (MMX | MMX_EXTENSIONS | SSE | SSE2)) == (MMX | MMX_EXTENSIONS | SSE | SSE2);
+	features = detect_cpu_features();
 	initialized = TRUE;
     }
-    
-    return sse2_present;
-}
 
-#endif
-
-#else /* __amd64__ */
-#ifdef USE_X86_MMX
-#define pixman_have_mmx() TRUE
-#endif
-#ifdef USE_SSE2
-#define pixman_have_sse2() TRUE
-#endif
-#endif /* __amd64__ */
+    return (features & feature) == feature;
+}
 
 #endif
 
 pixman_implementation_t *
 _pixman_x86_get_implementations (pixman_implementation_t *imp)
 {
+#define MMX_BITS  (X86_MMX | X86_MMX_EXTENSIONS)
+#define SSE2_BITS (X86_MMX | X86_MMX_EXTENSIONS | X86_SSE | X86_SSE2)
+
 #ifdef USE_X86_MMX
-    if (!_pixman_disabled ("mmx") && pixman_have_mmx())
+    if (!_pixman_disabled ("mmx") && have_feature (MMX_BITS))
 	imp = _pixman_implementation_create_mmx (imp);
 #endif
 
 #ifdef USE_SSE2
-    if (!_pixman_disabled ("sse2") && pixman_have_sse2())
+    if (!_pixman_disabled ("sse2") && have_feature (SSE2_BITS))
 	imp = _pixman_implementation_create_sse2 (imp);
 #endif
 


More information about the xorg-commit mailing list