pixman: Branch 'master' - 2 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Sun Oct 10 15:21:58 PDT 2010
pixman/pixman-arm-neon.c | 8 +++++--
pixman/pixman-fast-path.c | 51 +++++++++++++++++++++++++++++++++++++++++++---
2 files changed, 54 insertions(+), 5 deletions(-)
New commits:
commit 8d76c1b3391e1165aaf9e0f331749aee1394f62c
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Mon Oct 4 04:49:08 2010 +0300
ARM: restore fallback to ARMv6 implementation from NEON in the delegate chain
After fast path cache introduction, the overhead of having this fallback is
insignificant. On the other hand, some of the ARM assembly optimizations (for
example nearest neighbor scaling) do not need NEON.
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 54b5540..be5d403 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -355,9 +355,13 @@ BIND_COMBINE_U (out_reverse)
pixman_implementation_t *
_pixman_implementation_create_arm_neon (void)
{
- pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
+#ifdef USE_ARM_SIMD
+ pixman_implementation_t *fallback = _pixman_implementation_create_arm_simd ();
+#else
+ pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
+#endif
pixman_implementation_t *imp =
- _pixman_implementation_create (general, arm_neon_fast_paths);
+ _pixman_implementation_create (fallback, arm_neon_fast_paths);
imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
commit c748650d700c2f18f1587f06ada3b58d6ddc18d3
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Wed Sep 8 09:30:23 2010 +0300
Use more unrolling for scaled src_0565_0565 with nearest filter
Benchmark from Intel Core i7 860:
== before ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s
== after ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s
== performance of nonscaled src_0565_0565 operation as a reference ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s
Benchmark from ARM Cortex-A8:
== before ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=81.79 MPix/s
== after ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
== performance of nonscaled src_0565_0565 operation as a reference ==
op=1, src_fmt=10020565, dst_fmt=10020565, speed=197.44 MPix/s
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index c210919..5d5fa95 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1399,15 +1399,60 @@ FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
-FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER);
-FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
-FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD);
FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t * dst,
+ uint16_t * src,
+ int32_t w,
+ pixman_fixed_t vx,
+ pixman_fixed_t unit_x,
+ pixman_fixed_t max_vx)
+{
+ uint16_t tmp1, tmp2, tmp3, tmp4;
+ while ((w -= 4) >= 0)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp3 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp4 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ *dst++ = tmp3;
+ *dst++ = tmp4;
+ }
+ if (w & 2)
+ {
+ tmp1 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ tmp2 = src[pixman_fixed_to_int (vx)];
+ vx += unit_x;
+ *dst++ = tmp1;
+ *dst++ = tmp2;
+ }
+ if (w & 1)
+ *dst++ = src[pixman_fixed_to_int (vx)];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, COVER);
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, NONE);
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+ scaled_nearest_scanline_565_565_SRC,
+ uint16_t, uint16_t, PAD);
+
static force_inline uint32_t
fetch_nearest (pixman_repeat_t src_repeat,
pixman_format_code_t format,
More information about the xorg-commit
mailing list