pixman: Branch 'master'

Oded Gabbay gabbayo at kemper.freedesktop.org
Tue Sep 29 04:23:50 PDT 2015


 pixman/pixman-vmx.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

New commits:
commit 90e62c086766afffd289a321c7de8ea4b5cac87d
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date:   Fri Sep 4 15:39:00 2015 +0300

    vmx: implement fast path vmx_composite_over_n_8888
    
    Running "lowlevel-blt-bench over_n_8888" on Playstation3 3.2GHz,
    Gentoo ppc (32-bit userland) gave the following results:
    
    before:  over_n_8888 =  L1: 147.47  L2: 205.86  M:121.07
    after:   over_n_8888 =  L1: 287.27  L2: 261.09  M:133.48
    
    Cairo non-trimmed benchmarks on POWER8, 3.4GHz 8 Cores:
    
    ocitysmap          659.69  -> 611.71   :  1.08x speedup
    xfce4-terminal-a1  2725.22 -> 2547.47  :  1.07x speedup
    
    Signed-off-by: Siarhei Siamashka <siarhei.siamashka at gmail.com>
    Signed-off-by: Oded Gabbay <oded.gabbay at gmail.com>

diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 3eaa866..41efdcf 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -2628,6 +2628,58 @@ vmx_composite_src_x888_8888 (pixman_implementation_t *imp,
 }
 
 static void
+vmx_composite_over_n_8888 (pixman_implementation_t *imp,
+                           pixman_composite_info_t *info)
+{
+    PIXMAN_COMPOSITE_ARGS (info);
+    uint32_t *dst_line, *dst;
+    uint32_t src, ia;
+    int      i, w, dst_stride;
+    vector unsigned int vdst, vsrc, via;
+
+    src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
+
+    if (src == 0)
+	return;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+
+    vsrc = (vector unsigned int){src, src, src, src};
+    via = negate (splat_alpha (vsrc));
+    ia = ALPHA_8 (~src);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	w = width;
+
+	while (w && ((uintptr_t)dst & 15))
+	{
+	    uint32_t d = *dst;
+	    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+	    *dst++ = d;
+	    w--;
+	}
+
+	for (i = w / 4; i > 0; i--)
+	{
+	    vdst = pix_multiply (load_128_aligned (dst), via);
+	    save_128_aligned (dst, pix_add (vsrc, vdst));
+	    dst += 4;
+	}
+
+	for (i = w % 4; --i >= 0;)
+	{
+	    uint32_t d = dst[i];
+	    UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, src);
+	    dst[i] = d;
+	}
+    }
+}
+
+static void
 vmx_composite_over_8888_8888 (pixman_implementation_t *imp,
                                pixman_composite_info_t *info)
 {
@@ -2936,6 +2988,8 @@ FAST_NEAREST_MAINLOOP (vmx_8888_8888_normal_OVER,
 
 static const pixman_fast_path_t vmx_fast_paths[] =
 {
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null, a8r8g8b8, vmx_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null, x8r8g8b8, vmx_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, vmx_composite_over_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, vmx_composite_over_8888_8888),


More information about the xorg-commit mailing list