pixman: Branch 'master' - 2 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Sun Oct 10 15:21:58 PDT 2010


 pixman/pixman-arm-neon.c  |    8 +++++--
 pixman/pixman-fast-path.c |   51 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 54 insertions(+), 5 deletions(-)

New commits:
commit 8d76c1b3391e1165aaf9e0f331749aee1394f62c
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Mon Oct 4 04:49:08 2010 +0300

    ARM: restore fallback to ARMv6 implementation from NEON in the delegate chain
    
    After fast path cache introduction, the overhead of having this fallback is
    insignificant. On the other hand, some of the ARM assembly optimizations (for
    example nearest neighbor scaling) do not need NEON.

diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 54b5540..be5d403 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -355,9 +355,13 @@ BIND_COMBINE_U (out_reverse)
 pixman_implementation_t *
 _pixman_implementation_create_arm_neon (void)
 {
-    pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
+#ifdef USE_ARM_SIMD
+    pixman_implementation_t *fallback = _pixman_implementation_create_arm_simd ();
+#else
+    pixman_implementation_t *fallback = _pixman_implementation_create_fast_path ();
+#endif
     pixman_implementation_t *imp =
-	_pixman_implementation_create (general, arm_neon_fast_paths);
+	_pixman_implementation_create (fallback, arm_neon_fast_paths);
 
     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
     imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
commit c748650d700c2f18f1587f06ada3b58d6ddc18d3
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Sep 8 09:30:23 2010 +0300

    Use more unrolling for scaled src_0565_0565 with nearest filter
    
    Benchmark from Intel Core i7 860:
    
        == before ==
        op=1, src_fmt=10020565, dst_fmt=10020565, speed=1335.29 MPix/s
    
        == after ==
        op=1, src_fmt=10020565, dst_fmt=10020565, speed=1550.96 MPix/s
    
        == performance of nonscaled src_0565_0565 operation as a reference ==
        op=1, src_fmt=10020565, dst_fmt=10020565, speed=2401.31 MPix/s
    
    Benchmark from ARM Cortex-A8:
    
        == before ==
        op=1, src_fmt=10020565, dst_fmt=10020565, speed=81.79 MPix/s
    
        == after ==
        op=1, src_fmt=10020565, dst_fmt=10020565, speed=89.55 MPix/s
    
        == performance of nonscaled src_0565_0565 operation as a reference ==
        op=1, src_fmt=10020565, dst_fmt=10020565, speed=197.44 MPix/s

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index c210919..5d5fa95 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1399,15 +1399,60 @@ FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER);
 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE);
 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD);
 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL);
-FAST_NEAREST (565_565_cover, 0565, 0565, uint16_t, uint16_t, SRC, COVER);
-FAST_NEAREST (565_565_none, 0565, 0565, uint16_t, uint16_t, SRC, NONE);
-FAST_NEAREST (565_565_pad, 0565, 0565, uint16_t, uint16_t, SRC, PAD);
 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL);
 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER);
 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE);
 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD);
 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL);
 
+/* Use more unrolling for src_0565_0565 because it is typically CPU bound */
+static force_inline void
+scaled_nearest_scanline_565_565_SRC (uint16_t *      dst,
+				     uint16_t *      src,
+				     int32_t         w,
+				     pixman_fixed_t  vx,
+				     pixman_fixed_t  unit_x,
+				     pixman_fixed_t  max_vx)
+{
+    uint16_t tmp1, tmp2, tmp3, tmp4;
+    while ((w -= 4) >= 0)
+    {
+	tmp1 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp2 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp3 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp4 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	*dst++ = tmp1;
+	*dst++ = tmp2;
+	*dst++ = tmp3;
+	*dst++ = tmp4;
+    }
+    if (w & 2)
+    {
+	tmp1 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	tmp2 = src[pixman_fixed_to_int (vx)];
+	vx += unit_x;
+	*dst++ = tmp1;
+	*dst++ = tmp2;
+    }
+    if (w & 1)
+	*dst++ = src[pixman_fixed_to_int (vx)];
+}
+
+FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
+		       scaled_nearest_scanline_565_565_SRC,
+		       uint16_t, uint16_t, COVER);
+FAST_NEAREST_MAINLOOP (565_565_none_SRC,
+		       scaled_nearest_scanline_565_565_SRC,
+		       uint16_t, uint16_t, NONE);
+FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
+		       scaled_nearest_scanline_565_565_SRC,
+		       uint16_t, uint16_t, PAD);
+
 static force_inline uint32_t
 fetch_nearest (pixman_repeat_t src_repeat,
 	       pixman_format_code_t format,


More information about the xorg-commit mailing list