pixman: Branch 'master' - 6 commits
Søren Sandmann Pedersen
sandmann at kemper.freedesktop.org
Tue Mar 23 07:56:25 PDT 2010
configure.ac | 54 +++----
pixman/Makefile.am | 9 -
pixman/pixman-arm-common.h | 273 +++++++++++++++++++++++++++++++++++
pixman/pixman-arm-neon-asm.S | 16 +-
pixman/pixman-arm-neon.c | 304 +++++++--------------------------------
pixman/pixman-arm-simd-asm.S | 330 +++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 171 +++++++++-------------
7 files changed, 769 insertions(+), 388 deletions(-)
New commits:
commit 27a9f0468bdfa257e70270bf9addd5ad064f918b
Merge: 69f1ec9... 3ef2033...
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Tue Mar 23 11:00:04 2010 -0400
Merge remote branch 'ssvb/arm-fixes'
commit 3ef203331f124bf137c6e0c8d5516b1209c92dd9
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Mar 22 21:56:17 2010 +0200
ARM: SIMD optimizations moved to a separate .S file
This should be the last step in providing full armv4t compatibility
with CPU features runtime autodetection in pixman.
diff --git a/configure.ac b/configure.ac
index 4668715..ed7d16a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -361,30 +361,24 @@ AC_SUBST(VMX_CFLAGS)
AM_CONDITIONAL(USE_VMX, test $have_vmx_intrinsics = yes)
-dnl ===========================================================================
-dnl Check for ARM SIMD instructions
-ARM_SIMD_CFLAGS=""
-
+dnl ==========================================================================
+dnl Check if assembler is gas compatible and supports ARM SIMD instructions
have_arm_simd=no
AC_MSG_CHECKING(whether to use ARM SIMD assembler)
-# check with default CFLAGS in case the toolchain turns on a sufficiently recent -mcpu=
-AC_COMPILE_IFELSE([
-int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
-}], have_arm_simd=yes,
- # check again with an explicit -mcpu= in case the toolchain defaults to an
- # older one; note that uqadd8 isn't available in Thumb mode on arm1136j-s
- # so we force ARM mode
- ARM_SIMD_CFLAGS="-mcpu=arm1136j-s -marm"
- xserver_save_CFLAGS=$CFLAGS
- CFLAGS="$ARM_SIMD_CFLAGS $CFLAGS"
- AC_COMPILE_IFELSE([
- int main () {
- asm("uqadd8 r1, r1, r2");
- return 0;
- }], have_arm_simd=yes)
- CFLAGS=$xserver_save_CFLAGS)
+xserver_save_CFLAGS=$CFLAGS
+CFLAGS="-x assembler-with-cpp $CFLAGS"
+AC_COMPILE_IFELSE([[
+.text
+.arch armv6
+.object_arch armv4
+.arm
+.altmacro
+#ifndef __ARM_EABI__
+#error EABI is required (to be sure that calling conventions are compatible)
+#endif
+pld [r0]
+uqadd8 r0, r0, r0]], have_arm_simd=yes)
+CFLAGS=$xserver_save_CFLAGS
AC_ARG_ENABLE(arm-simd,
[AC_HELP_STRING([--disable-arm-simd],
@@ -396,20 +390,16 @@ if test $enable_arm_simd = no ; then
fi
if test $have_arm_simd = yes ; then
- AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD compiler intrinsics])
-else
- ARM_SIMD_CFLAGS=
+ AC_DEFINE(USE_ARM_SIMD, 1, [use ARM SIMD assembly optimizations])
fi
+AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
+
AC_MSG_RESULT($have_arm_simd)
if test $enable_arm_simd = yes && test $have_arm_simd = no ; then
AC_MSG_ERROR([ARM SIMD intrinsics not detected])
fi
-AC_SUBST(ARM_SIMD_CFLAGS)
-
-AM_CONDITIONAL(USE_ARM_SIMD, test $have_arm_simd = yes)
-
dnl ==========================================================================
dnl Check if assembler is gas compatible and supports NEON instructions
have_arm_neon=no
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index 5a0e7a9..66ad7f0 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -97,12 +97,14 @@ endif
if USE_ARM_SIMD
noinst_LTLIBRARIES += libpixman-arm-simd.la
libpixman_arm_simd_la_SOURCES = \
- pixman-arm-simd.c
-libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS) $(ARM_SIMD_CFLAGS)
+ pixman-arm-simd.c \
+ pixman-arm-common.h \
+ pixman-arm-simd-asm.S
+libpixman_arm_simd_la_CFLAGS = $(DEP_CFLAGS)
libpixman_arm_simd_la_LIBADD = $(DEP_LIBS)
libpixman_1_la_LIBADD += libpixman-arm-simd.la
-ASM_CFLAGS_arm_simd=$(ARM_SIMD_CFLAGS)
+ASM_CFLAGS_arm_simd=
endif
# arm neon code
@@ -110,6 +112,7 @@ if USE_ARM_NEON
noinst_LTLIBRARIES += libpixman-arm-neon.la
libpixman_arm_neon_la_SOURCES = \
pixman-arm-neon.c \
+ pixman-arm-common.h \
pixman-arm-neon-asm.S \
pixman-arm-neon-asm.h
libpixman_arm_neon_la_CFLAGS = $(DEP_CFLAGS)
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
new file mode 100644
index 0000000..1a1a0d6
--- /dev/null
+++ b/pixman/pixman-arm-simd-asm.S
@@ -0,0 +1,330 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Jeff Muizelaar (jeff at infidigm.net)
+ *
+ */
+
+/* Prevent the stack from becoming executable */
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
+ .text
+ .arch armv6
+ .object_arch armv4
+ .arm
+ .altmacro
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+ .func fname
+ .global fname
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
+
+/*
+ * The code below was generated by gcc 4.3.4 from the commented out
+ * functions in 'pixman-arm-simd.c' file with the following optimization
+ * options: "-O3 -mcpu=arm1136jf-s -fomit-frame-pointer"
+ *
+ * TODO: replace gcc generated code with hand tuned versions because
+ * the code quality is not very good, introduce symbolic register
+ * aliases for better readability and maintainability.
+ */
+
+pixman_asm_function pixman_composite_add_8000_8000_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ mov r10, r1
+ sub sp, sp, #4
+ subs r10, r10, #1
+ mov r11, r0
+ mov r8, r2
+ str r3, [sp]
+ ldr r7, [sp, #36]
+ bcc 0f
+6: cmp r11, #0
+ beq 1f
+ orr r3, r8, r7
+ tst r3, #3
+ beq 2f
+ mov r1, r8
+ mov r0, r7
+ mov r12, r11
+ b 3f
+5: tst r3, #3
+ beq 4f
+3: ldrb r2, [r0], #1
+ subs r12, r12, #1
+ ldrb r3, [r1]
+ uqadd8 r3, r2, r3
+ strb r3, [r1], #1
+ orr r3, r1, r0
+ bne 5b
+1: ldr r3, [sp]
+ add r8, r8, r3
+ ldr r3, [sp, #40]
+ add r7, r7, r3
+10: subs r10, r10, #1
+ bcs 6b
+0: add sp, sp, #4
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+2: mov r12, r11
+ mov r1, r8
+ mov r0, r7
+4: cmp r12, #3
+ subgt r6, r12, #4
+ movgt r9, r12
+ lsrgt r5, r6, #2
+ addgt r3, r5, #1
+ movgt r12, #0
+ lslgt r4, r3, #2
+ ble 7f
+8: ldr r3, [r0, r12]
+ ldr r2, [r1, r12]
+ uqadd8 r3, r3, r2
+ str r3, [r1, r12]
+ add r12, r12, #4
+ cmp r12, r4
+ bne 8b
+ sub r3, r9, #4
+ bic r3, r3, #3
+ add r3, r3, #4
+ subs r12, r6, r5, lsl #2
+ add r1, r1, r3
+ add r0, r0, r3
+ beq 1b
+7: mov r4, #0
+9: ldrb r3, [r1, r4]
+ ldrb r2, [r0, r4]
+ uqadd8 r3, r2, r3
+ strb r3, [r1, r4]
+ add r4, r4, #1
+ cmp r4, r12
+ bne 9b
+ ldr r3, [sp]
+ add r8, r8, r3
+ ldr r3, [sp, #40]
+ add r7, r7, r3
+ b 10b
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #20
+ cmp r1, #0
+ mov r12, r2
+ str r1, [sp, #12]
+ str r0, [sp, #16]
+ ldr r2, [sp, #52]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp]
+ ldr r3, [sp, #56]
+ mov r10, #0
+ lsl r3, r3, #2
+ str r3, [sp, #8]
+ mov r11, r3
+ b 1f
+6: ldr r11, [sp, #8]
+1: ldr r9, [sp]
+ mov r0, r12
+ add r12, r12, r9
+ mov r1, r2
+ str r12, [sp, #4]
+ add r2, r2, r11
+ ldr r12, [sp, #16]
+ ldr r3, =0x00800080
+ ldr r9, =0xff00ff00
+ mov r11, #255
+ cmp r12, #0
+ beq 4f
+5: ldr r5, [r1], #4
+ ldr r4, [r0]
+ sub r8, r11, r5, lsr #24
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ mla r6, r6, r8, r3
+ mla r7, r7, r8, r3
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ and r7, r7, r9
+ uxtab16 r6, r7, r6, ror #8
+ uqadd8 r5, r6, r5
+ str r5, [r0], #4
+ subs r12, r12, #1
+ bne 5b
+4: ldr r3, [sp, #12]
+ add r10, r10, #1
+ cmp r10, r3
+ ldr r12, [sp, #4]
+ bne 6b
+0: add sp, sp, #20
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_8888_n_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #28
+ cmp r1, #0
+ str r1, [sp, #12]
+ ldrb r1, [sp, #71]
+ mov r12, r2
+ str r0, [sp, #16]
+ ldr r2, [sp, #60]
+ str r1, [sp, #24]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp, #20]
+ ldr r3, [sp, #64]
+ mov r10, #0
+ lsl r3, r3, #2
+ str r3, [sp, #8]
+ mov r11, r3
+ b 1f
+5: ldr r11, [sp, #8]
+1: ldr r4, [sp, #20]
+ mov r0, r12
+ mov r1, r2
+ add r12, r12, r4
+ add r2, r2, r11
+ str r12, [sp]
+ str r2, [sp, #4]
+ ldr r12, [sp, #16]
+ ldr r2, =0x00800080
+ ldr r3, [sp, #24]
+ mov r11, #255
+ cmp r12, #0
+ beq 3f
+4: ldr r5, [r1], #4
+ ldr r4, [r0]
+ uxtb16 r6, r5
+ uxtb16 r7, r5, ror #8
+ mla r6, r6, r3, r2
+ mla r7, r7, r3, r2
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r5, r6, r7, lsl #8
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ sub r8, r11, r5, lsr #24
+ mla r6, r6, r8, r2
+ mla r7, r7, r8, r2
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r6, r6, r7, lsl #8
+ uqadd8 r5, r6, r5
+ str r5, [r0], #4
+ subs r12, r12, #1
+ bne 4b
+3: ldr r1, [sp, #12]
+ add r10, r10, #1
+ cmp r10, r1
+ ldr r12, [sp]
+ ldr r2, [sp, #4]
+ bne 5b
+0: add sp, sp, #28
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
+
+pixman_asm_function pixman_composite_over_n_8_8888_asm_armv6
+ push {r4, r5, r6, r7, r8, r9, r10, r11}
+ sub sp, sp, #28
+ cmp r1, #0
+ ldr r9, [sp, #60]
+ str r1, [sp, #12]
+ bic r1, r9, #-16777216
+ str r1, [sp, #20]
+ mov r12, r2
+ lsr r1, r9, #8
+ ldr r2, [sp, #20]
+ bic r1, r1, #-16777216
+ bic r2, r2, #65280
+ bic r1, r1, #65280
+ str r2, [sp, #20]
+ str r0, [sp, #16]
+ str r1, [sp, #4]
+ ldr r2, [sp, #68]
+ beq 0f
+ lsl r3, r3, #2
+ str r3, [sp, #24]
+ mov r0, #0
+ b 1f
+5: ldr r3, [sp, #24]
+1: ldr r4, [sp, #72]
+ mov r10, r12
+ mov r1, r2
+ add r12, r12, r3
+ add r2, r2, r4
+ str r12, [sp, #8]
+ str r2, [sp]
+ ldr r12, [sp, #16]
+ ldr r11, =0x00800080
+ ldr r2, [sp, #4]
+ ldr r3, [sp, #20]
+ cmp r12, #0
+ beq 3f
+4: ldrb r5, [r1], #1
+ ldr r4, [r10]
+ mla r6, r3, r5, r11
+ mla r7, r2, r5, r11
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r5, r6, r7, lsl #8
+ uxtb16 r6, r4
+ uxtb16 r7, r4, ror #8
+ mvn r8, r5
+ lsr r8, r8, #24
+ mla r6, r6, r8, r11
+ mla r7, r7, r8, r11
+ uxtab16 r6, r6, r6, ror #8
+ uxtab16 r7, r7, r7, ror #8
+ uxtb16 r6, r6, ror #8
+ uxtb16 r7, r7, ror #8
+ orr r6, r6, r7, lsl #8
+ uqadd8 r5, r6, r5
+ str r5, [r10], #4
+ subs r12, r12, #1
+ bne 4b
+3: ldr r4, [sp, #12]
+ add r0, r0, #1
+ cmp r0, r4
+ ldr r12, [sp, #8]
+ ldr r2, [sp]
+ bne 5b
+0: add sp, sp, #28
+ pop {r4, r5, r6, r7, r8, r9, r10, r11}
+ bx lr
+.endfunc
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f110753..389c9e0 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -30,6 +30,8 @@
#include "pixman-private.h"
#include "pixman-arm-common.h"
+#if 0 /* This code was moved to 'pixman-arm-simd-asm.S' */
+
void
pixman_composite_add_8000_8000_asm_armv6 (int32_t width,
int32_t height,
@@ -371,6 +373,8 @@ pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
}
}
+#endif
+
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
commit 0a0591c2f7abde8880f4aebd510c27517a414450
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Mar 22 19:51:00 2010 +0200
ARM: SIMD optimizations updated to use common assembly calling conventions
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 09a2888..f110753 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -28,31 +28,20 @@
#endif
#include "pixman-private.h"
-
-static void
-arm_composite_add_8000_8000 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+#include "pixman-arm-common.h"
+
+void
+pixman_composite_add_8000_8000_asm_armv6 (int32_t width,
+ int32_t height,
+ uint8_t *dst_line,
+ int32_t dst_stride,
+ uint8_t *src_line,
+ int32_t src_stride)
{
- uint8_t *dst_line, *dst;
- uint8_t *src_line, *src;
- int dst_stride, src_stride;
+ uint8_t *dst, *src;
int32_t w;
uint8_t s, d;
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -101,32 +90,21 @@ arm_composite_add_8000_8000 (pixman_implementation_t * impl,
}
-static void
-arm_composite_over_8888_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_8888_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t *src_line,
+ int32_t src_stride)
{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- int dst_stride, src_stride;
+ uint32_t *dst;
+ uint32_t *src;
int32_t w;
uint32_t component_half = 0x800080;
uint32_t upper_component_mask = 0xff00ff00;
uint32_t alpha_mask = 0xff;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -194,33 +172,21 @@ arm_composite_over_8888_8888 (pixman_implementation_t * impl,
}
}
-static void
-arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_8888_n_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t *src_line,
+ int32_t src_stride,
+ uint32_t mask)
{
- uint32_t *dst_line, *dst;
- uint32_t *src_line, *src;
- uint32_t mask;
- int dst_stride, src_stride;
+ uint32_t *dst;
+ uint32_t *src;
int32_t w;
uint32_t component_half = 0x800080;
uint32_t alpha_mask = 0xff;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
-
- mask = _pixman_image_get_solid (mask_image, PIXMAN_a8r8g8b8);
mask = (mask) >> 24;
while (height--)
@@ -303,33 +269,22 @@ arm_composite_over_8888_n_8888 (pixman_implementation_t * impl,
}
}
-static void
-arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
- pixman_op_t op,
- pixman_image_t * src_image,
- pixman_image_t * mask_image,
- pixman_image_t * dst_image,
- int32_t src_x,
- int32_t src_y,
- int32_t mask_x,
- int32_t mask_y,
- int32_t dest_x,
- int32_t dest_y,
- int32_t width,
- int32_t height)
+void
+pixman_composite_over_n_8_8888_asm_armv6 (int32_t width,
+ int32_t height,
+ uint32_t *dst_line,
+ int32_t dst_stride,
+ uint32_t src,
+ int32_t unused,
+ uint8_t *mask_line,
+ int32_t mask_stride)
{
- uint32_t src, srca;
- uint32_t *dst_line, *dst;
- uint8_t *mask_line, *mask;
- int dst_stride, mask_stride;
+ uint32_t srca;
+ uint32_t *dst;
+ uint8_t *mask;
int32_t w;
- src = _pixman_image_get_solid (src_image, dst_image->bits.format);
-
- /* bail out if fully transparent */
srca = src >> 24;
- if (src == 0)
- return;
uint32_t component_mask = 0xff00ff;
uint32_t component_half = 0x800080;
@@ -337,9 +292,6 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
uint32_t src_hi = (src >> 8) & component_mask;
uint32_t src_lo = src & component_mask;
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
-
while (height--)
{
dst = dst_line;
@@ -419,21 +371,34 @@ arm_composite_over_n_8_8888 (pixman_implementation_t * impl,
}
}
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8000_8000,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (armv6, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (armv6, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+
static const pixman_fast_path_t arm_simd_fast_paths[] =
{
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, arm_composite_over_8888_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, arm_composite_over_8888_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, arm_composite_over_8888_n_8888),
-
- PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, arm_composite_add_8000_8000),
-
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, arm_composite_over_n_8_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, arm_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, armv6_composite_over_8888_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+
+ PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8000_8000),
+
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, armv6_composite_over_n_8_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, armv6_composite_over_n_8_8888),
{ PIXMAN_OP_NONE },
};
commit c1e8d4533aea3aa10c49465cf5e9a44d946f70bb
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Mar 22 18:51:54 2010 +0200
ARM: Helper ARM NEON assembly binding macros moved into a separate header
This is needed for future reuse of the same macros for the other
ARM assembly optimizations (armv4t, armv6)
diff --git a/pixman/pixman-arm-common.h b/pixman/pixman-arm-common.h
new file mode 100644
index 0000000..8d432b1
--- /dev/null
+++ b/pixman/pixman-arm-common.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Author: Siarhei Siamashka (siarhei.siamashka at nokia.com)
+ */
+
+#ifndef PIXMAN_ARM_COMMON_H
+#define PIXMAN_ARM_COMMON_H
+
+/* Define some macros which can expand into proxy functions between
+ * ARM assembly optimized functions and the rest of pixman fast path API.
+ *
+ * All the low level ARM assembly functions have to use ARM EABI
+ * calling convention and take up to 8 arguments:
+ * width, height, dst, dst_stride, src, src_stride, mask, mask_stride
+ *
+ * The arguments are ordered with the most important coming first (the
+ * first 4 arguments are passed to function in registers, the rest are
+ * on stack). The last arguments are optional, for example if the
+ * function is not using mask, then 'mask' and 'mask_stride' can be
+ * omitted when doing a function call.
+ *
+ * Arguments 'src' and 'mask' contain either a pointer to the top left
+ * pixel of the composited rectangle or a pixel color value depending
+ * on the function type. In the case of just a color value (solid source
+ * or mask), the corresponding stride argument is unused.
+ */
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_DST(cputype, name, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ int32_t dst_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST(cputype, name, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ uint32_t src, \
+ int32_t unused, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, mask_stride; \
+ uint32_t src; \
+ \
+ src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
+ \
+ if (src == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src, 0, \
+ mask_line, mask_stride); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST(cputype, name, \
+ src_type, src_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ uint32_t mask); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ int32_t dst_stride, src_stride; \
+ uint32_t mask; \
+ \
+ mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
+ \
+ if (mask == 0) \
+ return; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask); \
+}
+
+#define PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST(cputype, name, \
+ src_type, src_cnt, \
+ mask_type, mask_cnt, \
+ dst_type, dst_cnt) \
+void \
+pixman_composite_##name##_asm_##cputype (int32_t w, \
+ int32_t h, \
+ dst_type *dst, \
+ int32_t dst_stride, \
+ src_type *src, \
+ int32_t src_stride, \
+ mask_type *mask, \
+ int32_t mask_stride); \
+ \
+static void \
+cputype##_composite_##name (pixman_implementation_t *imp, \
+ pixman_op_t op, \
+ pixman_image_t * src_image, \
+ pixman_image_t * mask_image, \
+ pixman_image_t * dst_image, \
+ int32_t src_x, \
+ int32_t src_y, \
+ int32_t mask_x, \
+ int32_t mask_y, \
+ int32_t dest_x, \
+ int32_t dest_y, \
+ int32_t width, \
+ int32_t height) \
+{ \
+ dst_type *dst_line; \
+ src_type *src_line; \
+ mask_type *mask_line; \
+ int32_t dst_stride, src_stride, mask_stride; \
+ \
+ PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
+ dst_stride, dst_line, dst_cnt); \
+ PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
+ src_stride, src_line, src_cnt); \
+ PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
+ mask_stride, mask_line, mask_cnt); \
+ \
+ pixman_composite_##name##_asm_##cputype (width, height, \
+ dst_line, dst_stride, \
+ src_line, src_stride, \
+ mask_line, mask_stride); \
+}
+
+#endif
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 24ceeeb..6808b36 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -32,254 +32,62 @@
#include <string.h>
#include "pixman-private.h"
-
-#define BIND_SRC_NULL_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride); \
-}
-
-#define BIND_N_NULL_DST(name, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- int32_t dst_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
- \
- if (src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src); \
-}
-
-#define BIND_N_MASK_DST(name, mask_type, mask_cnt, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- uint32_t src, \
- int32_t unused, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- mask_type *mask_line; \
- int32_t dst_stride, mask_stride; \
- uint32_t src; \
- \
- src = _pixman_image_get_solid (src_image, dst_image->bits.format); \
- \
- if (src == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src, 0, \
- mask_line, mask_stride); \
-}
-
-#define BIND_SRC_N_DST(name, src_type, src_cnt, dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- uint32_t mask); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- int32_t dst_stride, src_stride; \
- uint32_t mask; \
- \
- mask = _pixman_image_get_solid (mask_image, dst_image->bits.format);\
- \
- if (mask == 0) \
- return; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask); \
-}
-
-#define BIND_SRC_MASK_DST(name, src_type, src_cnt, mask_type, mask_cnt, \
- dst_type, dst_cnt) \
-void \
-pixman_composite_##name##_asm_neon (int32_t w, \
- int32_t h, \
- dst_type *dst, \
- int32_t dst_stride, \
- src_type *src, \
- int32_t src_stride, \
- mask_type *mask, \
- int32_t mask_stride); \
- \
-static void \
-neon_composite_##name (pixman_implementation_t *imp, \
- pixman_op_t op, \
- pixman_image_t * src_image, \
- pixman_image_t * mask_image, \
- pixman_image_t * dst_image, \
- int32_t src_x, \
- int32_t src_y, \
- int32_t mask_x, \
- int32_t mask_y, \
- int32_t dest_x, \
- int32_t dest_y, \
- int32_t width, \
- int32_t height) \
-{ \
- dst_type *dst_line; \
- src_type *src_line; \
- mask_type *mask_line; \
- int32_t dst_stride, src_stride, mask_stride; \
- \
- PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, dst_type, \
- dst_stride, dst_line, dst_cnt); \
- PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, src_type, \
- src_stride, src_line, src_cnt); \
- PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type, \
- mask_stride, mask_line, mask_cnt); \
- \
- pixman_composite_##name##_asm_neon (width, height, \
- dst_line, dst_stride, \
- src_line, src_stride, \
- mask_line, mask_stride); \
-}
-
-
-BIND_SRC_NULL_DST(src_8888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_x888_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
-BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
-BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
-BIND_SRC_NULL_DST(src_pixbuf_8888, uint32_t, 1, uint32_t, 1)
-BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
-BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_N_NULL_DST(over_n_0565, uint16_t, 1)
-BIND_N_NULL_DST(over_n_8888, uint32_t, 1)
-BIND_N_NULL_DST(over_reverse_n_8888, uint32_t, 1)
-
-BIND_SRC_NULL_DST(over_8888_0565, uint32_t, 1, uint16_t, 1)
-BIND_SRC_NULL_DST(over_8888_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_N_MASK_DST(over_n_8_0565, uint8_t, 1, uint16_t, 1)
-BIND_N_MASK_DST(over_n_8_8888, uint8_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(over_n_8888_8888_ca, uint32_t, 1, uint32_t, 1)
-BIND_N_MASK_DST(add_n_8_8, uint8_t, 1, uint8_t, 1)
-
-BIND_SRC_N_DST(over_8888_n_8888, uint32_t, 1, uint32_t, 1)
-
-BIND_SRC_MASK_DST(add_8_8_8, uint8_t, 1, uint8_t, 1, uint8_t, 1)
-BIND_SRC_MASK_DST(add_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(over_8888_8_8888, uint32_t, 1, uint8_t, 1, uint32_t, 1)
-BIND_SRC_MASK_DST(over_8888_8888_8888, uint32_t, 1, uint32_t, 1, uint32_t, 1)
+#include "pixman-arm-common.h"
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_x888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_0565,
+ uint16_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0888,
+ uint8_t, 3, uint8_t, 3)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0565_8888,
+ uint16_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_8888_rev,
+ uint8_t, 3, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_0888_0565_rev,
+ uint8_t, 3, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, src_pixbuf_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8000_8000,
+ uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, add_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_0565,
+ uint32_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (neon, over_8888_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_0565,
+ uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_n_8888,
+ uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (neon, over_reverse_n_8888,
+ uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_0565,
+ uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8_8888,
+ uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, over_n_8888_8888_ca,
+ uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (neon, add_n_8_8,
+ uint8_t, 1, uint8_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (neon, over_8888_n_8888,
+ uint32_t, 1, uint32_t, 1)
+
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8_8_8,
+ uint8_t, 1, uint8_t, 1, uint8_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, add_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_8888,
+ uint32_t, 1, uint8_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
+ uint32_t, 1, uint32_t, 1, uint32_t, 1)
void
pixman_composite_src_n_8_asm_neon (int32_t w,
commit 5791026e45f79d8f5168e302a498455870363ac6
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Sun Dec 27 00:27:53 2009 +0200
ARM: Workaround for a NEON bug in assembler from binutils 2.18
The problem was reported as bug 25534 against pixman in
freedesktop.org bugzila. Link to a patch for binutils:
http://sourceware.org/ml/binutils/2008-03/msg00260.html
For pixman the impact is a build failure when using
binutils 2.18. Versions 2.19 and higer are fine. Still
some distros may be using older versions of binutils and
this is causing problems.
This patch workarounds the problem by replacing a problematic
"vmov a, b" instruction with equivalent "vorr a, b, b". Actually
they even map to the same instruction opcode in the generated
code, so the resulting binary is identical with and without patch.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 6be8d23..51bc347 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -876,8 +876,8 @@ generate_composite_function \
vsli.u64 d0, d0, #8
vsli.u64 d0, d0, #16
vsli.u64 d0, d0, #32
- vmov d1, d0
- vmov q1, q0
+ vorr d1, d0, d0
+ vorr q1, q0, q0
.endm
.macro pixman_composite_src_n_8_cleanup
@@ -915,8 +915,8 @@ generate_composite_function \
vld1.32 {d0[0]}, [DUMMY]
vsli.u64 d0, d0, #16
vsli.u64 d0, d0, #32
- vmov d1, d0
- vmov q1, q0
+ vorr d1, d0, d0
+ vorr q1, q0, q0
.endm
.macro pixman_composite_src_n_0565_cleanup
@@ -953,8 +953,8 @@ generate_composite_function \
add DUMMY, sp, #ARGS_STACK_OFFSET
vld1.32 {d0[0]}, [DUMMY]
vsli.u64 d0, d0, #32
- vmov d1, d0
- vmov q1, q0
+ vorr d1, d0, d0
+ vorr q1, q0, q0
.endm
.macro pixman_composite_src_n_8888_cleanup
commit 68d8d83223b5a35e25d379c2ee9e2e3a1d242323
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Mar 22 11:54:51 2010 +0200
ARM: Use '.object_arch' directive in NEON assembly file
This can be used to override the architecture recorded in the EABI object
attribute section. We set a minimum arch to 'armv4'. Binutils documentation
recommends to use this directive with the code performing runtime detection
of CPU features.
Additionally NEON/VFP EABI attributes are suppressed. And the instruction
set to use is explicitly set to '.arm'.
Configure test for NEON support is also updated to include a bunch of
these new directives (if any of these is unsupported by the assembler,
it is better to fail configure test than to fail library build).
All these changes are required to fix SIGILL problem on armv4t, reported in
http://lists.freedesktop.org/archives/pixman/2010-March/000123.html
diff --git a/configure.ac b/configure.ac
index fc3ee24..4668715 100644
--- a/configure.ac
+++ b/configure.ac
@@ -415,10 +415,14 @@ dnl Check if assembler is gas compatible and supports NEON instructions
have_arm_neon=no
AC_MSG_CHECKING(whether to use ARM NEON assembler)
xserver_save_CFLAGS=$CFLAGS
-CFLAGS="-x assembler-with-cpp"
+CFLAGS="-x assembler-with-cpp $CFLAGS"
AC_COMPILE_IFELSE([[
.text
.fpu neon
+.arch armv7a
+.object_arch armv4
+.eabi_attribute 10, 0
+.arm
.altmacro
#ifndef __ARM_EABI__
#error EABI is required (to be sure that calling conventions are compatible)
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index eb8cc4c..6be8d23 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -42,6 +42,10 @@
.text
.fpu neon
.arch armv7a
+ .object_arch armv4
+ .eabi_attribute 10, 0 /* suppress Tag_FP_arch */
+ .eabi_attribute 12, 0 /* suppress Tag_Advanced_SIMD_arch */
+ .arm
.altmacro
#include "pixman-arm-neon-asm.h"
More information about the xorg-commit
mailing list