pixman: Branch 'master' - 6 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Tue Mar 23 07:56:25 PDT 2010


 configure.ac                 |   54 +++----
 pixman/Makefile.am           |    9 -
 pixman/pixman-arm-common.h   |  273 +++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-neon-asm.S |   16 +-
 pixman/pixman-arm-neon.c     |  304 +++++++--------------------------------
 pixman/pixman-arm-simd-asm.S |  330 +++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd.c     |  171 +++++++++-------------
 7 files changed, 769 insertions(+), 388 deletions(-)

New commits:
commit 27a9f0468bdfa257e70270bf9addd5ad064f918b
Merge: 69f1ec9... 3ef2033...
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Tue Mar 23 11:00:04 2010 -0400

    Merge remote branch 'ssvb/arm-fixes'

commit 3ef203331f124bf137c6e0c8d5516b1209c92dd9
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Mar 22 21:56:17 2010 +0200

    ARM: SIMD optimizations moved to a separate .S file
    
    This should be the last step in providing full armv4t compatibility
    with CPU features runtime autodetection in pixman.

diff --git a/configure.ac b/configure.ac
index 4668715..ed7d16a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -361,30 +361,24 @@ AC_SUBST(VMX_CFLAGS)
 
 AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
 
-dnl ===========================================================================
-dnl Check for ARM SIMD instructions
-ARM_SIMD_CFLAGS=""
-
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports ARM SIMD instructions
 have_arm_simd=no
 AC_MSG_CHECKING(whether to use ARM SIMD assembler)
-# check with default CFLAGS in case the toolchain turns on a sufficiently recent -mcpu=
-AC_COMPILE_IFELSE([
-int main () {
-    asm("uqadd8 r1, r1, r2");
-    return 0;
-}], have_arm_simd=yes,
-    # check again with an explicit -mcpu= in case the toolchain defaults to an
-    # older one; note that uqadd8 isn't available in Thumb mode on arm1136j-s
-    # so we force ARM mode
-    ARM_SIMD_CFLAGS="-mcpu=arm1136j-s -marm"
-    xserver_save_CFLAGS=$CFLAGS
-    CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
-    AC_COMPILE_IFELSE([
-    int main () {
-        asm("uqadd8 r1, r1, r2");
-        return 0;
-    }], have_arm_simd=yes)
-    CFLAGS=$xserver_save_CFLAGS)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0]], have_arm_simd=yes)
+CFLAGS=$xserver_save_CFLAGS
 
 AC_ARG_ENABLE(arm-simd,
    [AC_HELP_STRING([--disable-arm-simd],
@@ -396,20 +390,16 @@ if test $enable_arm_simd = no ; then
 fi
 
 if test $have_arm_simd = yes ; then
-   AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
-else
-   ARM_SIMD_CFLAGS=
+   AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
 fi
 
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
+
 AC_MSG_RESULT($have_arm_simd)
 if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
    AC_MSG_ERROR([ARM SIMD intrinsics not detected])
 fi
 
-AC_SUBST(ARM_SIMD_CFLAGS)
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
 dnl ==========================================================================
 dnl Check if assembler is gas compatible and supports NEON instructions
 have_arm_neon=no
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 5a0e7a9..66ad7f0 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -97,12 +97,14 @@ endif
 if USE_ARM_SIMD
 noinst_LTLIBRARIES += libpixman-arm-simd.la
 libpixman_arm_simd_la_SOURCES = \
-	pixman-arm-simd.c
-libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
+	pixman-arm-simd.c	\
+	pixman-arm-common.h	\
+	pixman-arm-simd-asm.S
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
 libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
 libpixman_1_la_LIBADD += libpixman-arm-simd.la
 
-ASM_CFLAGS_arm_simd=$(ARM_SIMD_CFLAGS)
+ASM_CFLAGS_arm_simd=
 endif
 
 # arm neon code
@@ -110,6 +112,7 @@ if USE_ARM_NEON
 noinst_LTLIBRARIES += libpixman-arm-neon.la
 libpixman_arm_neon_la_SOURCES = \
         pixman-arm-neon.c	\
+        pixman-arm-common.h	\
         pixman-arm-neon-asm.S	\
         pixman-arm-neon-asm.h
 libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
new file mode 100644
index 0000000..1a1a0d6
--- /dev/null
+++ b/pixman/pixman-arm-simd-asm.S
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission.  Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose.  It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author:  Jeff Muizelaar (jeff at infidigm.net)
+ *
+ */
+
+/* Prevent the stack from becoming executable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+	.text
+	.arch armv6
+	.object_arch armv4
+	.arm
+	.altmacro
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+	.func fname
+	.global fname
+#ifdef __ELF__
+	.hidden fname
+	.type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * The code below was generated by gcc 4.3.4 from the commented out
+ * functions in 'pixman-arm-simd.c' file with the following optimization
+ * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
+ *
+ * TODO: replace gcc generated code with hand tuned versions because
+ * the code quality is not very good, introduce symbolic register
+ * aliases for better readability and maintainability.
+ */
+
+pixman_asm_function pixman_composite_add_8000_8000_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	mov	r10, r1
+	sub	sp, sp, #4
+	subs	r10, r10, #1
+	mov	r11, r0
+	mov	r8, r2
+	str	r3, [sp]
+	ldr	r7, [sp, #36]
+	bcc	0f
+6:	cmp	r11, #0
+	beq	1f
+	orr	r3, r8, r7
+	tst	r3, #3
+	beq	2f
+	mov	r1, r8
+	mov	r0, r7
+	mov	r12, r11
+	b	3f
+5:	tst	r3, #3
+	beq	4f
+3:	ldrb	r2, [r0], #1
+	subs	r12, r12, #1
+	ldrb	r3, [r1]
+	uqadd8	r3, r2, r3
+	strb	r3, [r1], #1
+	orr	r3, r1, r0
+	bne	5b
+1:	ldr	r3, [sp]
+	add	r8, r8, r3
+	ldr	r3, [sp, #40]
+	add	r7, r7, r3
+10:	subs	r10, r10, #1
+	bcs	6b
+0:	add	sp, sp, #4
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+2:	mov	r12, r11
+	mov	r1, r8
+	mov	r0, r7
+4:	cmp	r12, #3
+	subgt	r6, r12, #4
+	movgt	r9, r12
+	lsrgt	r5, r6, #2
+	addgt	r3, r5, #1
+	movgt	r12, #0
+	lslgt	r4, r3, #2
+	ble	7f
+8:	ldr	r3, [r0, r12]
+	ldr	r2, [r1, r12]
+	uqadd8	r3, r3, r2
+	str	r3, [r1, r12]
+	add	r12, r12, #4
+	cmp	r12, r4
+	bne	8b
+	sub	r3, r9, #4
+	bic	r3, r3, #3
+	add	r3, r3, #4
+	subs	r12, r6, r5, lsl #2
+	add	r1, r1, r3
+	add	r0, r0, r3
+	beq	1b
+7:	mov	r4, #0
+9:	ldrb	r3, [r1, r4]
+	ldrb	r2, [r0, r4]
+	uqadd8	r3, r2, r3
+	strb	r3, [r1, r4]
+	add	r4, r4, #1
+	cmp	r4, r12
+	bne	9b
+	ldr	r3, [sp]
+	add	r8, r8, r3
+	ldr	r3, [sp, #40]
+	add	r7, r7, r3
+	b	10b
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	sub	sp, sp, #20
+	cmp	r1, #0
+	mov	r12, r2
+	str	r1, [sp, #12]
+	str	r0, [sp, #16]
+	ldr	r2, [sp, #52]
+	beq	0f
+	lsl	r3, r3, #2
+	str	r3, [sp]
+	ldr	r3, [sp, #56]
+	mov	r10, #0
+	lsl	r3, r3, #2
+	str	r3, [sp, #8]
+	mov	r11, r3
+	b	1f
+6:	ldr	r11, [sp, #8]
+1:	ldr	r9, [sp]
+	mov	r0, r12
+	add	r12, r12, r9
+	mov	r1, r2
+	str	r12, [sp, #4]
+	add	r2, r2, r11
+	ldr	r12, [sp, #16]
+	ldr	r3, =0x00800080
+	ldr	r9, =0xff00ff00
+	mov	r11, #255
+	cmp	r12, #0
+	beq	4f
+5:	ldr	r5, [r1], #4
+	ldr	r4, [r0]
+	sub	r8, r11, r5, lsr #24
+	uxtb16	r6, r4
+	uxtb16	r7, r4, ror #8
+	mla	r6, r6, r8, r3
+	mla	r7, r7, r8, r3
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	and	r7, r7, r9
+	uxtab16	r6, r7, r6, ror #8
+	uqadd8	r5, r6, r5
+	str	r5, [r0], #4
+	subs	r12, r12, #1
+	bne	5b
+4:	ldr	r3, [sp, #12]
+	add	r10, r10, #1
+	cmp	r10, r3
+	ldr	r12, [sp, #4]
+	bne	6b
+0:	add	sp, sp, #20
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	sub	sp, sp, #28
+	cmp	r1, #0
+	str	r1, [sp, #12]
+	ldrb	r1, [sp, #71]
+	mov	r12, r2
+	str	r0, [sp, #16]
+	ldr	r2, [sp, #60]
+	str	r1, [sp, #24]
+	beq	0f
+	lsl	r3, r3, #2
+	str	r3, [sp, #20]
+	ldr	r3, [sp, #64]
+	mov	r10, #0
+	lsl	r3, r3, #2
+	str	r3, [sp, #8]
+	mov	r11, r3
+	b	1f
+5:	ldr	r11, [sp, #8]
+1:	ldr	r4, [sp, #20]
+	mov	r0, r12
+	mov	r1, r2
+	add	r12, r12, r4
+	add	r2, r2, r11
+	str	r12, [sp]
+	str	r2, [sp, #4]
+	ldr	r12, [sp, #16]
+	ldr	r2, =0x00800080
+	ldr	r3, [sp, #24]
+	mov	r11, #255
+	cmp	r12, #0
+	beq	3f
+4:	ldr	r5, [r1], #4
+	ldr	r4, [r0]
+	uxtb16	r6, r5
+	uxtb16	r7, r5, ror #8
+	mla	r6, r6, r3, r2
+	mla	r7, r7, r3, r2
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r5, r6, r7, lsl #8
+	uxtb16	r6, r4
+	uxtb16	r7, r4, ror #8
+	sub	r8, r11, r5, lsr #24
+	mla	r6, r6, r8, r2
+	mla	r7, r7, r8, r2
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r6, r6, r7, lsl #8
+	uqadd8	r5, r6, r5
+	str	r5, [r0], #4
+	subs	r12, r12, #1
+	bne	4b
+3:	ldr	r1, [sp, #12]
+	add	r10, r10, #1
+	cmp	r10, r1
+	ldr	r12, [sp]
+	ldr	r2, [sp, #4]
+	bne	5b
+0:	add	sp, sp, #28
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+	push	{r4, r5, r6, r7, r8, r9, r10, r11}
+	sub	sp, sp, #28
+	cmp	r1, #0
+	ldr	r9, [sp, #60]
+	str	r1, [sp, #12]
+	bic	r1, r9, #-16777216
+	str	r1, [sp, #20]
+	mov	r12, r2
+	lsr	r1, r9, #8
+	ldr	r2, [sp, #20]
+	bic	r1, r1, #-16777216
+	bic	r2, r2, #65280
+	bic	r1, r1, #65280
+	str	r2, [sp, #20]
+	str	r0, [sp, #16]
+	str	r1, [sp, #4]
+	ldr	r2, [sp, #68]
+	beq	0f
+	lsl	r3, r3, #2
+	str	r3, [sp, #24]
+	mov	r0, #0
+	b	1f
+5:	ldr	r3, [sp, #24]
+1:	ldr	r4, [sp, #72]
+	mov	r10, r12
+	mov	r1, r2
+	add	r12, r12, r3
+	add	r2, r2, r4
+	str	r12, [sp, #8]
+	str	r2, [sp]
+	ldr	r12, [sp, #16]
+	ldr	r11, =0x00800080
+	ldr	r2, [sp, #4]
+	ldr	r3, [sp, #20]
+	cmp	r12, #0
+	beq	3f
+4:	ldrb	r5, [r1], #1
+	ldr	r4, [r10]
+	mla	r6, r3, r5, r11
+	mla	r7, r2, r5, r11
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r5, r6, r7, lsl #8
+	uxtb16	r6, r4
+	uxtb16	r7, r4, ror #8
+	mvn	r8, r5
+	lsr	r8, r8, #24
+	mla	r6, r6, r8, r11
+	mla	r7, r7, r8, r11
+	uxtab16	r6, r6, r6, ror #8
+	uxtab16	r7, r7, r7, ror #8
+	uxtb16	r6, r6, ror #8
+	uxtb16	r7, r7, ror #8
+	orr	r6, r6, r7, lsl #8
+	uqadd8	r5, r6, r5
+	str	r5, [r10], #4
+	subs	r12, r12, #1
+	bne	4b
+3:	ldr	r4, [sp, #12]
+	add	r0, r0, #1
+	cmp	r0, r4
+	ldr	r12, [sp, #8]
+	ldr	r2, [sp]
+	bne	5b
+0:	add	sp, sp, #28
+	pop	{r4, r5, r6, r7, r8, r9, r10, r11}
+	bx	lr
+.endfunc
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f110753..389c9e0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -30,6 +30,8 @@
 #include "pixman-private.h"
 #include "pixman-arm-common.h"
 
+#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+
 void
 pixman_composite_add_8000_8000_asm_armv6 (int32_t  width,
                                           int32_t  height,
@@ -371,6 +373,8 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t   width,
     }
 }
 
+#endif
+
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
                                    uint8_t, 1, uint8_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
commit 0a0591c2f7abde8880f4aebd510c27517a414450
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Mar 22 19:51:00 2010 +0200

    ARM: SIMD optimizations updated to use common assembly calling conventions

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 09a2888..f110753 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -28,31 +28,20 @@
 #endif
 
 #include "pixman-private.h"
-
-static void
-arm_composite_add_8000_8000 (pixman_implementation_t * impl,
-			     pixman_op_t               op,
-			     pixman_image_t *          src_image,
-			     pixman_image_t *          mask_image,
-			     pixman_image_t *          dst_image,
-			     int32_t                   src_x,
-			     int32_t                   src_y,
-			     int32_t                   mask_x,
-			     int32_t                   mask_y,
-			     int32_t                   dest_x,
-			     int32_t                   dest_y,
-			     int32_t                   width,
-			     int32_t                   height)
+#include "pixman-arm-common.h"
+
+void
+pixman_composite_add_8000_8000_asm_armv6 (int32_t  width,
+                                          int32_t  height,
+                                          uint8_t *dst_line,
+                                          int32_t  dst_stride,
+                                          uint8_t *src_line,
+                                          int32_t  src_stride)
 {
-    uint8_t     *dst_line, *dst;
-    uint8_t     *src_line, *src;
-    int dst_stride, src_stride;
+    uint8_t *dst, *src;
     int32_t w;
     uint8_t s, d;
 
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
     while (height--)
     {
 	dst = dst_line;
@@ -101,32 +90,21 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
 
 }
 
-static void
-arm_composite_over_8888_8888 (pixman_implementation_t * impl,
-			      pixman_op_t               op,
-			      pixman_image_t *          src_image,
-			      pixman_image_t *          mask_image,
-			      pixman_image_t *          dst_image,
-			      int32_t                   src_x,
-			      int32_t                   src_y,
-			      int32_t                   mask_x,
-			      int32_t                   mask_y,
-			      int32_t                   dest_x,
-			      int32_t                   dest_y,
-			      int32_t                   width,
-			      int32_t                   height)
+void
+pixman_composite_over_8888_8888_asm_armv6 (int32_t   width,
+                                           int32_t   height,
+                                           uint32_t *dst_line,
+                                           int32_t   dst_stride,
+                                           uint32_t *src_line,
+                                           int32_t   src_stride)
 {
-    uint32_t    *dst_line, *dst;
-    uint32_t    *src_line, *src;
-    int dst_stride, src_stride;
+    uint32_t    *dst;
+    uint32_t    *src;
     int32_t w;
     uint32_t component_half = 0x800080;
     uint32_t upper_component_mask = 0xff00ff00;
     uint32_t alpha_mask = 0xff;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
     while (height--)
     {
 	dst = dst_line;
@@ -194,33 +172,21 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
     }
 }
 
-static void
-arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
-				pixman_op_t               op,
-				pixman_image_t *          src_image,
-				pixman_image_t *          mask_image,
-				pixman_image_t *          dst_image,
-				int32_t                   src_x,
-				int32_t                   src_y,
-				int32_t                   mask_x,
-				int32_t                   mask_y,
-				int32_t                   dest_x,
-				int32_t                   dest_y,
-				int32_t                   width,
-				int32_t                   height)
+void
+pixman_composite_over_8888_n_8888_asm_armv6 (int32_t   width,
+                                             int32_t   height,
+                                             uint32_t *dst_line,
+                                             int32_t   dst_stride,
+                                             uint32_t *src_line,
+                                             int32_t   src_stride,
+                                             uint32_t  mask)
 {
-    uint32_t *dst_line, *dst;
-    uint32_t *src_line, *src;
-    uint32_t mask;
-    int dst_stride, src_stride;
+    uint32_t *dst;
+    uint32_t *src;
     int32_t w;
     uint32_t component_half = 0x800080;
     uint32_t alpha_mask = 0xff;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
-    mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
     mask = (mask) >> 24;
 
     while (height--)
@@ -303,33 +269,22 @@ arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
     }
 }
 
-static void
-arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
-			     pixman_op_t               op,
-			     pixman_image_t *          src_image,
-			     pixman_image_t *          mask_image,
-			     pixman_image_t *          dst_image,
-			     int32_t                   src_x,
-			     int32_t                   src_y,
-			     int32_t                   mask_x,
-			     int32_t                   mask_y,
-			     int32_t                   dest_x,
-			     int32_t                   dest_y,
-			     int32_t                   width,
-			     int32_t                   height)
+void
+pixman_composite_over_n_8_8888_asm_armv6 (int32_t   width,
+                                          int32_t   height,
+                                          uint32_t *dst_line,
+                                          int32_t   dst_stride,
+                                          uint32_t  src,
+                                          int32_t   unused,
+                                          uint8_t  *mask_line,
+                                          int32_t   mask_stride)
 {
-    uint32_t src, srca;
-    uint32_t *dst_line, *dst;
-    uint8_t  *mask_line, *mask;
-    int dst_stride, mask_stride;
+    uint32_t  srca;
+    uint32_t *dst;
+    uint8_t  *mask;
     int32_t w;
 
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
-    /* bail out if fully transparent */
     srca = src >> 24;
-    if (src == 0)
-	return;
 
     uint32_t component_mask = 0xff00ff;
     uint32_t component_half = 0x800080;
@@ -337,9 +292,6 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
     uint32_t src_hi = (src >> 8) & component_mask;
     uint32_t src_lo = src & component_mask;
 
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
     while (height--)
     {
 	dst = dst_line;
@@ -419,21 +371,34 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
     }
 }
 
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
+                                   uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
+
 static const pixman_fast_path_t arm_simd_fast_paths[] =
 {
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, arm_composite_over_8888_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, arm_composite_over_8888_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, arm_composite_over_8888_n_8888),
-
-    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, arm_composite_add_8000_8000),
-
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, arm_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+
+    PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8000_8000),
+
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
 
     { PIXMAN_OP_NONE },
 };
commit c1e8d4533aea3aa10c49465cf5e9a44d946f70bb
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Mar 22 18:51:54 2010 +0200

    ARM: Helper ARM NEON assembly binding macros moved into a separate header
    
    This is needed for future reuse of the same macros for the other
    ARM assembly optimizations (armv4t, armv6)

diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
new file mode 100644
index 0000000..8d432b1
--- /dev/null
+++ b/pixman/pixman-arm-common.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author:  Siarhei Siamashka (siarhei.siamashka at nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ *    width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name,                \
+                                          src_type, src_cnt,            \
+                                          dst_type, dst_cnt)            \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t   w,                   \
+                                         int32_t   h,                   \
+                                         dst_type *dst,                 \
+                                         int32_t   dst_stride,          \
+                                         src_type *src,                 \
+                                         int32_t   src_stride);         \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type *dst_line;                                                 \
+    src_type *src_line;                                                 \
+    int32_t dst_stride, src_stride;                                     \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride);     \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name,                  \
+                                        dst_type, dst_cnt)              \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         uint32_t   src);               \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    int32_t    dst_stride;                                              \
+    uint32_t   src;                                                     \
+                                                                        \
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+                                                                        \
+    if (src == 0)                                                       \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src);                      \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name,             \
+                                             mask_type, mask_cnt,       \
+                                             dst_type, dst_cnt)         \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         uint32_t   src,                \
+                                         int32_t    unused,             \
+                                         mask_type *mask,               \
+                                         int32_t    mask_stride);       \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    mask_type *mask_line;                                               \
+    int32_t    dst_stride, mask_stride;                                 \
+    uint32_t   src;                                                     \
+                                                                        \
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
+                                                                        \
+    if (src == 0)                                                       \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
+                           mask_stride, mask_line, mask_cnt);           \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src, 0,                    \
+                                             mask_line, mask_stride);   \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name,              \
+                                            src_type, src_cnt,          \
+                                            dst_type, dst_cnt)          \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         src_type  *src,                \
+                                         int32_t    src_stride,         \
+                                         uint32_t   mask);              \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    src_type  *src_line;                                                \
+    int32_t    dst_stride, src_stride;                                  \
+    uint32_t   mask;                                                    \
+                                                                        \
+    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
+                                                                        \
+    if (mask == 0)                                                      \
+	return;                                                         \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride,      \
+                                             mask);                     \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name,           \
+                                               src_type, src_cnt,       \
+                                               mask_type, mask_cnt,     \
+                                               dst_type, dst_cnt)       \
+void                                                                    \
+pixman_composite_##name##_asm_##cputype (int32_t    w,                  \
+                                         int32_t    h,                  \
+                                         dst_type  *dst,                \
+                                         int32_t    dst_stride,         \
+                                         src_type  *src,                \
+                                         int32_t    src_stride,         \
+                                         mask_type *mask,               \
+                                         int32_t    mask_stride);       \
+                                                                        \
+static void                                                             \
+cputype##_composite_##name (pixman_implementation_t *imp,               \
+                            pixman_op_t              op,                \
+                            pixman_image_t *         src_image,         \
+                            pixman_image_t *         mask_image,        \
+                            pixman_image_t *         dst_image,         \
+                            int32_t                  src_x,             \
+                            int32_t                  src_y,             \
+                            int32_t                  mask_x,            \
+                            int32_t                  mask_y,            \
+                            int32_t                  dest_x,            \
+                            int32_t                  dest_y,            \
+                            int32_t                  width,             \
+                            int32_t                  height)            \
+{                                                                       \
+    dst_type  *dst_line;                                                \
+    src_type  *src_line;                                                \
+    mask_type *mask_line;                                               \
+    int32_t    dst_stride, src_stride, mask_stride;                     \
+                                                                        \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
+                           dst_stride, dst_line, dst_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
+                           src_stride, src_line, src_cnt);              \
+    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
+                           mask_stride, mask_line, mask_cnt);           \
+                                                                        \
+    pixman_composite_##name##_asm_##cputype (width, height,             \
+                                             dst_line, dst_stride,      \
+                                             src_line, src_stride,      \
+                                             mask_line, mask_stride);   \
+}
+
+#endif
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 24ceeeb..6808b36 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -32,254 +32,62 @@
 
 #include <string.h>
 #include "pixman-private.h"
-
-#define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt)   \
-void                                                                    \
-pixman_composite_##name##_asm_neon (int32_t   w,                        \
-                                    int32_t   h,                        \
-                                    dst_type *dst,                      \
-                                    int32_t   dst_stride,               \
-                                    src_type *src,                      \
-                                    int32_t   src_stride);              \
-                                                                        \
-static void                                                             \
-neon_composite_##name (pixman_implementation_t *imp,                    \
-                       pixman_op_t              op,                     \
-                       pixman_image_t *         src_image,              \
-                       pixman_image_t *         mask_image,             \
-                       pixman_image_t *         dst_image,              \
-                       int32_t                  src_x,                  \
-                       int32_t                  src_y,                  \
-                       int32_t                  mask_x,                 \
-                       int32_t                  mask_y,                 \
-                       int32_t                  dest_x,                 \
-                       int32_t                  dest_y,                 \
-                       int32_t                  width,                  \
-                       int32_t                  height)                 \
-{                                                                       \
-    dst_type *dst_line;                                                 \
-    src_type *src_line;                                                 \
-    int32_t dst_stride, src_stride;                                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_neon (width, height,                  \
-                                        dst_line, dst_stride,           \
-                                        src_line, src_stride);          \
-}
-
-#define BIND_N_NULL_DST(name, dst_type, dst_cnt)                        \
-void                                                                    \
-pixman_composite_##name##_asm_neon (int32_t    w,                       \
-                                    int32_t    h,                       \
-                                    dst_type  *dst,                     \
-                                    int32_t    dst_stride,              \
-                                    uint32_t   src);                    \
-                                                                        \
-static void                                                             \
-neon_composite_##name (pixman_implementation_t *imp,                    \
-                       pixman_op_t              op,                     \
-                       pixman_image_t *         src_image,              \
-                       pixman_image_t *         mask_image,             \
-                       pixman_image_t *         dst_image,              \
-                       int32_t                  src_x,                  \
-                       int32_t                  src_y,                  \
-                       int32_t                  mask_x,                 \
-                       int32_t                  mask_y,                 \
-                       int32_t                  dest_x,                 \
-                       int32_t                  dest_y,                 \
-                       int32_t                  width,                  \
-                       int32_t                  height)                 \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    int32_t    dst_stride;                                              \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
-                                                                        \
-    if (src == 0)                                                       \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_neon (width, height,                  \
-                                        dst_line, dst_stride,           \
-                                        src);                           \
-}
-
-#define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt)   \
-void                                                                    \
-pixman_composite_##name##_asm_neon (int32_t    w,                       \
-                                    int32_t    h,                       \
-                                    dst_type  *dst,                     \
-                                    int32_t    dst_stride,              \
-                                    uint32_t   src,                     \
-                                    int32_t    unused,                  \
-                                    mask_type *mask,                    \
-                                    int32_t    mask_stride);            \
-                                                                        \
-static void                                                             \
-neon_composite_##name (pixman_implementation_t *imp,                    \
-                       pixman_op_t              op,                     \
-                       pixman_image_t *         src_image,              \
-                       pixman_image_t *         mask_image,             \
-                       pixman_image_t *         dst_image,              \
-                       int32_t                  src_x,                  \
-                       int32_t                  src_y,                  \
-                       int32_t                  mask_x,                 \
-                       int32_t                  mask_y,                 \
-                       int32_t                  dest_x,                 \
-                       int32_t                  dest_y,                 \
-                       int32_t                  width,                  \
-                       int32_t                  height)                 \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, mask_stride;                                 \
-    uint32_t   src;                                                     \
-                                                                        \
-    src = _pixman_image_get_solid (src_image, dst_image->bits.format);  \
-                                                                        \
-    if (src == 0)                                                       \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_composite_##name##_asm_neon (width, height,                  \
-                                        dst_line, dst_stride,           \
-                                        src, 0,                         \
-                                        mask_line, mask_stride);        \
-}
-
-#define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt)      \
-void                                                                    \
-pixman_composite_##name##_asm_neon (int32_t    w,                       \
-                                    int32_t    h,                       \
-                                    dst_type  *dst,                     \
-                                    int32_t    dst_stride,              \
-                                    src_type  *src,                     \
-                                    int32_t    src_stride,              \
-                                    uint32_t   mask);                   \
-                                                                        \
-static void                                                             \
-neon_composite_##name (pixman_implementation_t *imp,                    \
-                       pixman_op_t              op,                     \
-                       pixman_image_t *         src_image,              \
-                       pixman_image_t *         mask_image,             \
-                       pixman_image_t *         dst_image,              \
-                       int32_t                  src_x,                  \
-                       int32_t                  src_y,                  \
-                       int32_t                  mask_x,                 \
-                       int32_t                  mask_y,                 \
-                       int32_t                  dest_x,                 \
-                       int32_t                  dest_y,                 \
-                       int32_t                  width,                  \
-                       int32_t                  height)                 \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    src_type  *src_line;                                                \
-    int32_t    dst_stride, src_stride;                                  \
-    uint32_t   mask;                                                    \
-                                                                        \
-    mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
-                                                                        \
-    if (mask == 0)                                                      \
-	return;                                                         \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-                                                                        \
-    pixman_composite_##name##_asm_neon (width, height,                  \
-                                        dst_line, dst_stride,           \
-                                        src_line, src_stride,           \
-                                        mask);                          \
-}
-
-#define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
-                          dst_type, dst_cnt)                            \
-void                                                                    \
-pixman_composite_##name##_asm_neon (int32_t    w,                       \
-                                    int32_t    h,                       \
-                                    dst_type  *dst,                     \
-                                    int32_t    dst_stride,              \
-                                    src_type  *src,                     \
-                                    int32_t    src_stride,              \
-                                    mask_type *mask,                    \
-                                    int32_t    mask_stride);            \
-                                                                        \
-static void                                                             \
-neon_composite_##name (pixman_implementation_t *imp,                    \
-                       pixman_op_t              op,                     \
-                       pixman_image_t *         src_image,              \
-                       pixman_image_t *         mask_image,             \
-                       pixman_image_t *         dst_image,              \
-                       int32_t                  src_x,                  \
-                       int32_t                  src_y,                  \
-                       int32_t                  mask_x,                 \
-                       int32_t                  mask_y,                 \
-                       int32_t                  dest_x,                 \
-                       int32_t                  dest_y,                 \
-                       int32_t                  width,                  \
-                       int32_t                  height)                 \
-{                                                                       \
-    dst_type  *dst_line;                                                \
-    src_type  *src_line;                                                \
-    mask_type *mask_line;                                               \
-    int32_t    dst_stride, src_stride, mask_stride;                     \
-                                                                        \
-    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type,         \
-                           dst_stride, dst_line, dst_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type,           \
-                           src_stride, src_line, src_cnt);              \
-    PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type,       \
-                           mask_stride, mask_line, mask_cnt);           \
-                                                                        \
-    pixman_composite_##name##_asm_neon (width, height,                  \
-                                        dst_line, dst_stride,           \
-                                        src_line, src_stride,           \
-                                        mask_line, mask_stride);        \
-}
-
-
-BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_x888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
-BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
-BIND_SRC_NULL_DST(src_pixbuf_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
-BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
-BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
-BIND_N_NULL_DST(over_reverse_n_8888, uint32_t, 1)
-
-BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
-BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
-
-BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
-BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
+#include "pixman-arm-common.h"
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
+                                   uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
+                                   uint8_t, 3, uint8_t, 3)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
+                                   uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
+                                   uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
+                                   uint8_t, 3, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
+                                   uint8_t, 3, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8000_8000,
+                                   uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+                                   uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+                                   uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
+                                 uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
+                                 uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
+                                 uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
+                                      uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
+                                      uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
+                                      uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+                                      uint8_t, 1, uint8_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+                                     uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+                                        uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+                                        uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
+                                        uint32_t, 1, uint32_t, 1, uint32_t, 1)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
commit 5791026e45f79d8f5168e302a498455870363ac6
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Sun Dec 27 00:27:53 2009 +0200

    ARM: Workaround for a NEON bug in assembler from binutils 2.18
    
    The problem was reported as bug 25534 against pixman in
    freedesktop.org bugzila. Link to a patch for binutils:
    http://sourceware.org/ml/binutils/2008-03/msg00260.html
    
    For pixman the impact is a build failure when using
    binutils 2.18. Versions 2.19 and higer are fine. Still
    some distros may be using older versions of binutils and
    this is causing problems.
    
    This patch workarounds the problem by replacing a problematic
    "vmov a, b" instruction with equivalent "vorr a, b, b". Actually
    they even map to the same instruction opcode in the generated
    code, so the resulting binary is identical with and without patch.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 6be8d23..51bc347 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -876,8 +876,8 @@ generate_composite_function \
     vsli.u64    d0, d0, #8
     vsli.u64    d0, d0, #16
     vsli.u64    d0, d0, #32
-    vmov        d1, d0
-    vmov        q1, q0
+    vorr        d1, d0, d0
+    vorr        q1, q0, q0
 .endm
 
 .macro pixman_composite_src_n_8_cleanup
@@ -915,8 +915,8 @@ generate_composite_function \
     vld1.32     {d0[0]}, [DUMMY]
     vsli.u64    d0, d0, #16
     vsli.u64    d0, d0, #32
-    vmov        d1, d0
-    vmov        q1, q0
+    vorr        d1, d0, d0
+    vorr        q1, q0, q0
 .endm
 
 .macro pixman_composite_src_n_0565_cleanup
@@ -953,8 +953,8 @@ generate_composite_function \
     add         DUMMY, sp, #ARGS_STACK_OFFSET
     vld1.32     {d0[0]}, [DUMMY]
     vsli.u64    d0, d0, #32
-    vmov        d1, d0
-    vmov        q1, q0
+    vorr        d1, d0, d0
+    vorr        q1, q0, q0
 .endm
 
 .macro pixman_composite_src_n_8888_cleanup
commit 68d8d83223b5a35e25d379c2ee9e2e3a1d242323
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Mar 22 11:54:51 2010 +0200

    ARM: Use '.object_arch' directive in NEON assembly file
    
    This can be used to override the architecture recorded in the EABI object
    attribute section. We set a minimum arch to 'armv4'. Binutils documentation
    recommends to use this directive with the code performing runtime detection
    of CPU features.
    
    Additionally NEON/VFP EABI attributes are suppressed. And the instruction
    set to use is explicitly set to '.arm'.
    
    Configure test for NEON support is also updated to include a bunch of
    these new directives (if any of these is unsupported by the assembler,
    it is better to fail configure test than to fail library build).
    
    All these changes are required to fix SIGILL problem on armv4t, reported in
    http://lists.freedesktop.org/archives/pixman/2010-March/000123.html

diff --git a/configure.ac b/configure.ac
index fc3ee24..4668715 100644
--- a/configure.ac
+++ b/configure.ac
@@ -415,10 +415,14 @@ dnl Check if assembler is gas compatible and supports NEON instructions
 have_arm_neon=no
 AC_MSG_CHECKING(whether to use ARM NEON assembler)
 xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp"
+CFLAGS="-x assembler-with-cpp $CFLAGS"
 AC_COMPILE_IFELSE([[
 .text
 .fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.arm
 .altmacro
 #ifndef __ARM_EABI__
 #error EABI is required (to be sure that calling conventions are compatible)
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index eb8cc4c..6be8d23 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -42,6 +42,10 @@
     .text
     .fpu neon
     .arch armv7a
+    .object_arch armv4
+    .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
+    .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
+    .arm
     .altmacro
 
 #include "pixman-arm-neon-asm.h"


More information about the xorg-commit mailing list