pixman: Branch 'master' - 4 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Thu Apr 3 21:43:43 PDT 2014
configure.ac | 18 +++++++
pixman/Makefile.am | 2
pixman/pixman-arm-asm.h | 37 ++++++++++++++++
pixman/pixman-arm-neon-asm-bilinear.S | 12 -----
pixman/pixman-arm-neon-asm.S | 12 -----
pixman/pixman-arm-simd-asm-scaled.S | 11 ----
pixman/pixman-arm-simd-asm.S | 78 ++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 6 ++
test/tolerance-test.c | 4 -
test/utils-prng.c | 10 ++--
test/utils-prng.h | 9 +--
11 files changed, 155 insertions(+), 44 deletions(-)
New commits:
commit 4b76bbfda670f9ede67d0449f3640605e1fc4df0
Author: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
Date: Mon Mar 31 15:03:43 2014 +0300
ARM: share pixman_asm_function definition
Several files define identically the asm macro pixman_asm_function.
Merge all these definitions into a new asm header.
The original definition is taken from pixman-arm-simd-asm-scaled.S with
the copyright/licence/author blurb verbatim.
diff --git a/pixman/Makefile.am b/pixman/Makefile.am
index b376d9a..581b6f6 100644
--- a/pixman/Makefile.am
+++ b/pixman/Makefile.am
@@ -72,6 +72,7 @@ libpixman_arm_simd_la_SOURCES = \
pixman-arm-common.h \
pixman-arm-simd-asm.S \
pixman-arm-simd-asm-scaled.S \
+ pixman-arm-asm.h \
pixman-arm-simd-asm.h
libpixman_1_la_LIBADD += libpixman-arm-simd.la
@@ -86,6 +87,7 @@ libpixman_arm_neon_la_SOURCES = \
pixman-arm-common.h \
pixman-arm-neon-asm.S \
pixman-arm-neon-asm-bilinear.S \
+ pixman-arm-asm.h \
pixman-arm-neon-asm.h
libpixman_1_la_LIBADD += libpixman-arm-neon.la
diff --git a/pixman/pixman-arm-asm.h b/pixman/pixman-arm-asm.h
new file mode 100644
index 0000000..ee78541
--- /dev/null
+++ b/pixman/pixman-arm-asm.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2008 Mozilla Corporation
+ * Copyright © 2010 Nokia Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the name of Mozilla Corporation not be used in
+ * advertising or publicity pertaining to distribution of the software without
+ * specific, written prior permission. Mozilla Corporation makes no
+ * representations about the suitability of this software for any purpose. It
+ * is provided "as is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
+ * SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ * FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
+ * AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+ * SOFTWARE.
+ *
+ * Author: Jeff Muizelaar (jeff at infidigm.net)
+ *
+ */
+
+/* Supplementary macro for setting function attributes */
+.macro pixman_asm_function fname
+ .func fname
+ .global fname
+#ifdef __ELF__
+ .hidden fname
+ .type fname, %function
+#endif
+fname:
+.endm
diff --git a/pixman/pixman-arm-neon-asm-bilinear.S b/pixman/pixman-arm-neon-asm-bilinear.S
index e37b5c2..0fd92d6 100644
--- a/pixman/pixman-arm-neon-asm-bilinear.S
+++ b/pixman/pixman-arm-neon-asm-bilinear.S
@@ -65,23 +65,13 @@
.p2align 2
#include "pixman-private.h"
+#include "pixman-arm-asm.h"
#include "pixman-arm-neon-asm.h"
/*
* Bilinear macros from pixman-arm-neon-asm.S
*/
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
-.endm
-
/*
* Bilinear scaling support code which tries to provide pixel fetching, color
* format conversion, and interpolation as separate macros which can be used
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 187197d..7e949a3 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -50,6 +50,7 @@
.p2align 2
#include "pixman-private.h"
+#include "pixman-arm-asm.h"
#include "pixman-arm-neon-asm.h"
/* Global configuration options and preferences */
@@ -2830,17 +2831,6 @@ generate_composite_function_nearest_scanline \
/******************************************************************************/
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
-.endm
-
/*
* Bilinear scaling support code which tries to provide pixel fetching, color
* format conversion, and interpolation as separate macros which can be used
diff --git a/pixman/pixman-arm-simd-asm-scaled.S b/pixman/pixman-arm-simd-asm-scaled.S
index 7110995..e050292 100644
--- a/pixman/pixman-arm-simd-asm-scaled.S
+++ b/pixman/pixman-arm-simd-asm-scaled.S
@@ -37,16 +37,7 @@
.altmacro
.p2align 2
-/* Supplementary macro for setting function attributes */
-.macro pixman_asm_function fname
- .func fname
- .global fname
-#ifdef __ELF__
- .hidden fname
- .type fname, %function
-#endif
-fname:
-.endm
+#include "pixman-arm-asm.h"
/*
* Note: This code is only using armv5te instructions (not even armv6),
commit 4ee85b008333a95d4dfc27e7b04c444dcbb3a7e7
Author: Ben Avison <bavison at riscosopen.org>
Date: Fri Mar 28 11:13:21 2014 +0200
ARMv6: Add fast path for over_reverse_n_8888
Benchmark results, "before" is upstream commit
c343846 lowlevel-blt-bench: add in_reverse_8888_8888 test
and "after" is with this patch only added on top.
lowlevel-blt-bench, over_reverse_n_8888, 100 iterations:
Before After
Mean StdDev Mean StdDev Confidence Change
L1 15.1 0.1 274.5 2.3 100.00% +1718.9%
L2 12.8 0.3 181.8 0.7 100.00% +1315.5%
M 10.8 0.0 77.9 0.0 100.00% +621.2%
HT 9.7 0.0 29.4 0.2 100.00% +204.9%
VT 9.5 0.0 26.7 0.1 100.00% +179.3%
R 9.3 0.0 25.3 0.1 100.00% +173.6%
RT 6.0 0.1 11.0 0.2 100.00% +82.9%
At most 16 outliers rejected per case per set.
cairo-perf-trace with trimmed traces, 30 iterations:
Before After
Mean StdDev Mean StdDev Confidence Change
t-poppler.trace 12.9 0.1 9.7 0.0 100.00% +32.6%
t-firefox-talos-gfx.trace 33.2 0.7 32.9 0.4 95.23% +0.9% (insignificant)
t-firefox-particles.trace 27.4 0.1 27.3 0.2 99.65% +0.4%
t-firefox-canvas-alpha.trace 20.5 0.3 20.5 0.3 57.51% +0.3% (insignificant)
t-poppler-reseau.trace 22.4 0.1 22.4 0.1 95.69% +0.3% (insignificant)
t-firefox-fishtank.trace 13.2 0.0 13.2 0.0 99.84% +0.1%
t-swfdec-giant-steps.trace 14.9 0.0 14.9 0.0 87.68% +0.1% (insignificant)
t-swfdec-youtube.trace 7.8 0.0 7.8 0.0 35.22% +0.1% (insignificant)
t-firefox-planet-gnome.trace 11.5 0.0 11.5 0.0 29.37% +0.0% (insignificant)
t-firefox-fishbowl.trace 21.2 0.0 21.2 0.0 18.09% +0.0% (insignificant)
t-grads-heat-map.trace 4.4 0.0 4.4 0.0 1.84% +0.0% (insignificant)
t-firefox-paintball.trace 18.0 0.0 18.0 0.0 33.43% -0.0% (insignificant)
t-firefox-talos-svg.trace 20.5 0.0 20.5 0.1 68.56% -0.1% (insignificant)
t-midori-zoomed.trace 8.0 0.0 8.0 0.0 99.98% -0.1%
t-firefox-canvas-swscroll.trace 32.1 0.1 32.1 0.1 85.27% -0.1% (insignificant)
t-gnome-system-monitor.trace 17.2 0.0 17.2 0.0 99.97% -0.2%
t-firefox-chalkboard.trace 36.5 0.0 36.6 0.0 100.00% -0.2%
t-firefox-asteroids.trace 11.1 0.0 11.1 0.0 100.00% -0.2%
t-firefox-canvas.trace 17.9 0.0 18.0 0.0 100.00% -0.3%
t-chromium-tabs.trace 4.9 0.0 4.9 0.0 97.95% -0.3% (insignificant)
t-xfce4-terminal-a1.trace 4.8 0.0 4.8 0.0 100.00% -0.4%
t-firefox-scrolling.trace 31.1 0.1 31.2 0.1 100.00% -0.5%
t-evolution.trace 13.7 0.1 13.8 0.1 99.99% -0.6%
t-gnome-terminal-vim.trace 22.0 0.2 22.2 0.1 99.99% -0.7%
t-gvim.trace 33.2 0.2 33.5 0.2 100.00% -0.8%
At most 6 outliers rejected per case per set.
Cairo perf reports the running time, but the change is computed for
operations per second instead (inverse of running time).
Changes in the order of +/- 1% can be accounted for measurement errors,
even if they are deemed to be statistically significant. This claim is
based on comparing two 30-iteration identical "before" runs using the
exact same binaries, and observing changes from -0.4% to +0.5% with
>=99% confidence.
Confidence is based on Welch's t-test.
v4, Pekka Paalanen <pekka.paalanen at collabora.co.uk> :
Rebased, re-benchmarked on Raspberry Pi, commit message.
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index c209688..dd6f788 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -611,3 +611,81 @@ generate_composite_function \
/******************************************************************************/
+.macro over_reverse_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ ldr MASK, =0x00800080
+ /* Split source pixel into RB/AG parts */
+ uxtb16 STRIDE_S, SRC
+ uxtb16 STRIDE_M, SRC, ror #8
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+ line_saved_regs STRIDE_D, ORIG_W
+.endm
+
+.macro over_reverse_n_8888_newline
+ mov STRIDE_D, #0xFF
+.endm
+
+.macro over_reverse_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_reverse_n_8888_1pixel d, is_only
+ teq WK&d, #0
+ beq 8f /* replace with source */
+ bics ORIG_W, STRIDE_D, WK&d, lsr #24
+ .if is_only == 1
+ beq 49f /* skip store */
+ .else
+ beq 9f /* write same value back */
+ .endif
+ mla SCRATCH, STRIDE_S, ORIG_W, MASK /* red/blue */
+ mla ORIG_W, STRIDE_M, ORIG_W, MASK /* alpha/green */
+ uxtab16 SCRATCH, SCRATCH, SCRATCH, ror #8
+ uxtab16 ORIG_W, ORIG_W, ORIG_W, ror #8
+ mov SCRATCH, SCRATCH, ror #8
+ sel ORIG_W, SCRATCH, ORIG_W
+ uqadd8 WK&d, WK&d, ORIG_W
+ b 9f
+8: mov WK&d, SRC
+9:
+.endm
+
+.macro over_reverse_n_8888_tail numbytes, reg1, reg2, reg3, reg4
+ .if numbytes == 4
+ over_reverse_n_8888_1pixel reg1, 1
+ .else
+ and SCRATCH, WK®1, WK®2
+ .if numbytes == 16
+ and SCRATCH, SCRATCH, WK®3
+ and SCRATCH, SCRATCH, WK®4
+ .endif
+ mvns SCRATCH, SCRATCH, asr #24
+ beq 49f /* skip store if all opaque */
+ over_reverse_n_8888_1pixel reg1, 0
+ over_reverse_n_8888_1pixel reg2, 0
+ .if numbytes == 16
+ over_reverse_n_8888_1pixel reg3, 0
+ over_reverse_n_8888_1pixel reg4, 0
+ .endif
+ .endif
+ pixst , numbytes, reg1, DST
+49:
+.endm
+
+.macro over_reverse_n_8888_process_tail cond, numbytes, firstreg
+ over_reverse_n_8888_tail numbytes, firstreg, %(firstreg+1), %(firstreg+2), %(firstreg+3)
+.endm
+
+generate_composite_function \
+ pixman_composite_over_reverse_n_8888_asm_armv6, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_CORRUPTS_PSR | FLAG_PROCESS_DOES_STORE | FLAG_SPILL_LINE_VARS | FLAG_PROCESS_CORRUPTS_SCRATCH, \
+ 3, /* prefetch distance */ \
+ over_reverse_n_8888_init, \
+ over_reverse_n_8888_newline, \
+ nop_macro, /* cleanup */ \
+ over_reverse_n_8888_process_head, \
+ over_reverse_n_8888_process_tail
+
+/******************************************************************************/
+
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index af062e1..8fbc439 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -47,6 +47,9 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, add_8_8,
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
+ uint32_t, 1)
+
PIXMAN_ARM_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, armv6, over_8888_n_8888,
uint32_t, 1, uint32_t, 1)
@@ -225,6 +228,9 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
+
PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, armv6_composite_add_8_8),
PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, armv6_composite_over_n_8_8888),
commit 56622140e3a8175c8ccc82c9717adf8372043364
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date: Fri Mar 7 08:23:10 2014 +0200
test: Fix OpenMP clauses for the tolerance-test
Compiling with the Intel Compiler reveals a problem:
tolerance-test.c(350): error: index variable "i" of for statement following an OpenMP for pragma must be private
# pragma omp parallel for default(none) shared(i) private (result)
^
In addition to this, the 'result' variable also should not be private
(otherwise its value does not survive after the end of the loop). It
needs to be either shared or use the reduction clause to describe how
the results from multiple threads are combined together. Reduction
seems to be more appropriate here.
diff --git a/test/tolerance-test.c b/test/tolerance-test.c
index 5625630..320bb7f 100644
--- a/test/tolerance-test.c
+++ b/test/tolerance-test.c
@@ -347,12 +347,12 @@ main (int argc, const char *argv[])
else
{
#ifdef USE_OPENMP
-# pragma omp parallel for default(none) shared(i) private (result)
+# pragma omp parallel for default(none) reduction(|:result)
#endif
for (i = 0; i < N_TESTS; ++i)
{
if (!do_check (i))
- result = 1;
+ result |= 1;
}
}
commit 840912b31159aa8ac7be4ea0cee8bdef95a539a4
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date: Fri Mar 7 06:39:42 2014 +0200
configure.ac: Check if the compiler supports GCC vector extensions
The Intel Compiler 14.0.0 claims version GCC 4.7.3 compatibility
via __GNUC__/__GNUC__MINOR__ macros, but does not provide the same
level of GCC vector extensions support as the original GCC compiler:
http://gcc.gnu.org/onlinedocs/gcc/Vector-Extensions.html
Which results in the following compilation failure:
In file included from ../test/utils.h(7),
from ../test/utils.c(3):
../test/utils-prng.h(138): error: expression must have integral type
uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27)));
^
The problem is fixed by doing a special check in configure for
this feature.
diff --git a/configure.ac b/configure.ac
index 6327972..0339494 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1061,6 +1061,24 @@ fi
AC_MSG_RESULT($support_for_builtin_clz)
+dnl =====================================
+dnl GCC vector extensions
+
+support_for_gcc_vector_extensions=no
+
+AC_MSG_CHECKING(for GCC vector extensions)
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+unsigned int __attribute__ ((vector_size(16))) e, a, b;
+int main (void) { e = a - ((b << 27) + (b >> (32 - 27))) + 1; return e[0]; }
+]])], support_for_gcc_vector_extensions=yes)
+
+if test x$support_for_gcc_vector_extensions = xyes; then
+ AC_DEFINE([HAVE_GCC_VECTOR_EXTENSIONS], [],
+ [Whether the compiler supports GCC vector extensions])
+fi
+
+AC_MSG_RESULT($support_for_gcc_vector_extensions)
+
dnl ==================
dnl libpng
diff --git a/test/utils-prng.c b/test/utils-prng.c
index 7b32e35..c27b5be 100644
--- a/test/utils-prng.c
+++ b/test/utils-prng.c
@@ -27,7 +27,7 @@
#include "utils.h"
#include "utils-prng.h"
-#if defined(GCC_VECTOR_EXTENSIONS_SUPPORTED) && defined(__SSE2__)
+#if defined(HAVE_GCC_VECTOR_EXTENSIONS) && defined(__SSE2__)
#include <xmmintrin.h>
#endif
@@ -52,7 +52,7 @@ void smallprng_srand_r (smallprng_t *x, uint32_t seed)
*/
void prng_srand_r (prng_t *x, uint32_t seed)
{
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
int i;
prng_rand_128_data_t dummy;
smallprng_srand_r (&x->p0, seed);
@@ -75,7 +75,7 @@ void prng_srand_r (prng_t *x, uint32_t seed)
static force_inline void
store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned)
{
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
if (aligned)
{
*(uint8x16 *)addr = d->vb;
@@ -120,7 +120,7 @@ randmemset_internal (prng_t *prng,
{
prng_rand_128_r (&local_prng, &t);
prng_rand_128_r (&local_prng, &randdata);
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
if (flags & RANDMEMSET_MORE_FF)
{
const uint8x16 const_C0 =
@@ -199,7 +199,7 @@ randmemset_internal (prng_t *prng,
}
else
{
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
const uint8x16 bswap_shufflemask =
{
3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
diff --git a/test/utils-prng.h b/test/utils-prng.h
index 564ffce..f9ae8dd 100644
--- a/test/utils-prng.h
+++ b/test/utils-prng.h
@@ -79,8 +79,7 @@
/*****************************************************************************/
-#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))
-#define GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
typedef uint32_t uint32x4 __attribute__ ((vector_size(16)));
typedef uint8_t uint8x16 __attribute__ ((vector_size(16)));
#endif
@@ -92,7 +91,7 @@ typedef struct
typedef struct
{
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
uint32x4 a, b, c, d;
#else
smallprng_t p1, p2, p3, p4;
@@ -104,7 +103,7 @@ typedef union
{
uint8_t b[16];
uint32_t w[4];
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
uint8x16 vb;
uint32x4 vw;
#endif
@@ -134,7 +133,7 @@ prng_rand_r (prng_t *x)
static force_inline void
prng_rand_128_r (prng_t *x, prng_rand_128_data_t *data)
{
-#ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED
+#ifdef HAVE_GCC_VECTOR_EXTENSIONS
uint32x4 e = x->a - ((x->b << 27) + (x->b >> (32 - 27)));
x->a = x->b ^ ((x->c << 17) ^ (x->c >> (32 - 17)));
x->b = x->c + x->d;
More information about the xorg-commit
mailing list