pixman: Branch 'master' - 2 commits

Pekka Paalanen pq at kemper.freedesktop.org
Thu Sep 17 07:03:55 PDT 2015


 pixman/pixman-arm-simd-asm.S |   41 +++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-simd.c     |    6 ++++++
 2 files changed, 47 insertions(+)

New commits:
commit 73e586efb3ee149f76f15d9e549bffa15d8e30ec
Author: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
Date:   Mon Sep 7 14:40:49 2015 +0300

    armv6: enable over_n_8888
    
    Enable the fast path added in the previous patch by moving the lookup
    table entries to their proper locations.
    
    Lowlevel-blt-bench benchmark statistics with 30 iterations, showing the
    effect of adding this one patch on top of
    "armv6: Add over_n_8888 fast path (disabled)", which was applied on
    fd595692941f3d9ddea8934462bd1d18aed07c65.
    
           Before          After
          Mean StdDev     Mean StdDev   Confidence   Change
    L1    12.5   0.04     45.2   0.10    100.00%    +263.1%
    L2    11.1   0.02     43.2   0.03    100.00%    +289.3%
    M      9.4   0.00     42.4   0.02    100.00%    +351.7%
    HT     8.5   0.02     25.4   0.10    100.00%    +198.8%
    VT     8.4   0.02     22.3   0.07    100.00%    +167.0%
    R      8.2   0.02     23.1   0.09    100.00%    +183.6%
    RT     5.4   0.05     11.4   0.21    100.00%    +110.3%
    
    At most 3 outliers rejected per test per set.
    
    Iterating here means that lowlevel-blt-bench was executed 30 times, and
    the statistics above were computed from the output.
    
    Signed-off-by: Pekka Paalanen <pekka.paalanen at collabora.co.uk>

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 62c0f41..f0d1454 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -242,6 +242,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
 
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
 
@@ -273,11 +277,6 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
 
     { PIXMAN_OP_NONE },
-
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
-    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
 };
 
 pixman_implementation_t *
commit 9eb6889b15a180cc94aad8ac97189af5b3a68b96
Author: Ben Avison <bavison at riscosopen.org>
Date:   Mon Sep 7 14:40:48 2015 +0300

    armv6: Add over_n_8888 fast path (disabled)
    
    This new fast path is initially disabled by putting the entries in the
    lookup table after the sentinel. The compiler cannot tell the new code
    is not used, so it cannot eliminate the code. Also the lookup table size
    will include the new fast path. When the follow-up patch then enables
    the new fast path, the binary layout (alignments, size, etc.) will stay
    the same compared to the disabled case.
    
    Keeping the binary layout identical is important for benchmarking on
    Raspberry Pi 1. The addresses at which functions are loaded will have a
    significant impact on benchmark results, causing unexpected performance
    changes. Keeping all function addresses the same across the patch
    enabling a new fast path improves the reliability of benchmarks.
    
    Benchmark results are included in the patch enabling this fast path.
    
    [Pekka: disabled the fast path, commit message]
    Signed-off-by: Pekka Paalanen <pekka.paalanen at collabora.co.uk>

diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 7b0727b..a74a0a8 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -1136,3 +1136,44 @@ generate_composite_function \
     in_reverse_8888_8888_process_tail
 
 /******************************************************************************/
+
+.macro over_n_8888_init
+        ldr     SRC, [sp, #ARGS_STACK_OFFSET]
+        /* Hold loop invariant in MASK */
+        ldr     MASK, =0x00800080
+        /* Hold multiplier for destination in STRIDE_M */
+        mov     STRIDE_M, #255
+        sub     STRIDE_M, STRIDE_M, SRC, lsr #24
+        /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+        uadd8   SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head  cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+        pixld   , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+        mul_8888_8  WK&dst, STRIDE_M, SCRATCH, MASK
+        uqadd8  WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail  cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+        over_n_8888_1pixel %(PROCESS_REG)
+  .set PROCESS_REG, PROCESS_REG+1
+ .endr
+        pixst   , numbytes, firstreg, DST
+.endm
+
+generate_composite_function \
+    pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
+    FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
+    2, /* prefetch distance */ \
+    over_n_8888_init, \
+    nop_macro, /* newline */ \
+    nop_macro, /* cleanup */ \
+    over_n_8888_process_head, \
+    over_n_8888_process_tail
+
+/******************************************************************************/
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f40ff36..62c0f41 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -51,6 +51,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
 PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
                                    uint32_t, 1, uint32_t, 1)
 
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+                                 uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
                                  uint32_t, 1)
 
@@ -271,6 +273,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
 
     { PIXMAN_OP_NONE },
+
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
 };
 
 pixman_implementation_t *


More information about the xorg-commit mailing list