pixman: Branch 'master' - 2 commits
Pekka Paalanen
pq at kemper.freedesktop.org
Thu Sep 17 07:03:55 PDT 2015
pixman/pixman-arm-simd-asm.S | 41 +++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-simd.c | 6 ++++++
2 files changed, 47 insertions(+)
New commits:
commit 73e586efb3ee149f76f15d9e549bffa15d8e30ec
Author: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
Date: Mon Sep 7 14:40:49 2015 +0300
armv6: enable over_n_8888
Enable the fast path added in the previous patch by moving the lookup
table entries to their proper locations.
Lowlevel-blt-bench benchmark statistics with 30 iterations, showing the
effect of adding this one patch on top of
"armv6: Add over_n_8888 fast path (disabled)", which was applied on
fd595692941f3d9ddea8934462bd1d18aed07c65.
Before After
Mean StdDev Mean StdDev Confidence Change
L1 12.5 0.04 45.2 0.10 100.00% +263.1%
L2 11.1 0.02 43.2 0.03 100.00% +289.3%
M 9.4 0.00 42.4 0.02 100.00% +351.7%
HT 8.5 0.02 25.4 0.10 100.00% +198.8%
VT 8.4 0.02 22.3 0.07 100.00% +167.0%
R 8.2 0.02 23.1 0.09 100.00% +183.6%
RT 5.4 0.05 11.4 0.21 100.00% +110.3%
At most 3 outliers rejected per test per set.
Iterating here means that lowlevel-blt-bench was executed 30 times, and
the statistics above were computed from the output.
Signed-off-by: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 62c0f41..f0d1454 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -242,6 +242,10 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, a8b8g8r8, armv6_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, solid, x8b8g8r8, armv6_composite_over_8888_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, armv6_composite_over_reverse_n_8888),
PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, armv6_composite_over_reverse_n_8888),
@@ -273,11 +277,6 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
{ PIXMAN_OP_NONE },
-
- PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
- PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
};
pixman_implementation_t *
commit 9eb6889b15a180cc94aad8ac97189af5b3a68b96
Author: Ben Avison <bavison at riscosopen.org>
Date: Mon Sep 7 14:40:48 2015 +0300
armv6: Add over_n_8888 fast path (disabled)
This new fast path is initially disabled by putting the entries in the
lookup table after the sentinel. The compiler cannot tell the new code
is not used, so it cannot eliminate the code. Also the lookup table size
will include the new fast path. When the follow-up patch then enables
the new fast path, the binary layout (alignments, size, etc.) will stay
the same compared to the disabled case.
Keeping the binary layout identical is important for benchmarking on
Raspberry Pi 1. The addresses at which functions are loaded will have a
significant impact on benchmark results, causing unexpected performance
changes. Keeping all function addresses the same across the patch
enabling a new fast path improves the reliability of benchmarks.
Benchmark results are included in the patch enabling this fast path.
[Pekka: disabled the fast path, commit message]
Signed-off-by: Pekka Paalanen <pekka.paalanen at collabora.co.uk>
diff --git a/pixman/pixman-arm-simd-asm.S b/pixman/pixman-arm-simd-asm.S
index 7b0727b..a74a0a8 100644
--- a/pixman/pixman-arm-simd-asm.S
+++ b/pixman/pixman-arm-simd-asm.S
@@ -1136,3 +1136,44 @@ generate_composite_function \
in_reverse_8888_8888_process_tail
/******************************************************************************/
+
+.macro over_n_8888_init
+ ldr SRC, [sp, #ARGS_STACK_OFFSET]
+ /* Hold loop invariant in MASK */
+ ldr MASK, =0x00800080
+ /* Hold multiplier for destination in STRIDE_M */
+ mov STRIDE_M, #255
+ sub STRIDE_M, STRIDE_M, SRC, lsr #24
+ /* Set GE[3:0] to 0101 so SEL instructions do what we want */
+ uadd8 SCRATCH, MASK, MASK
+.endm
+
+.macro over_n_8888_process_head cond, numbytes, firstreg, unaligned_src, unaligned_mask, preload
+ pixld , numbytes, firstreg, DST, 0
+.endm
+
+.macro over_n_8888_1pixel dst
+ mul_8888_8 WK&dst, STRIDE_M, SCRATCH, MASK
+ uqadd8 WK&dst, WK&dst, SRC
+.endm
+
+.macro over_n_8888_process_tail cond, numbytes, firstreg
+ .set PROCESS_REG, firstreg
+ .rept numbytes / 4
+ over_n_8888_1pixel %(PROCESS_REG)
+ .set PROCESS_REG, PROCESS_REG+1
+ .endr
+ pixst , numbytes, firstreg, DST
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_asm_armv6, 0, 0, 32 \
+ FLAG_DST_READWRITE | FLAG_BRANCH_OVER | FLAG_PROCESS_DOES_STORE \
+ 2, /* prefetch distance */ \
+ over_n_8888_init, \
+ nop_macro, /* newline */ \
+ nop_macro, /* cleanup */ \
+ over_n_8888_process_head, \
+ over_n_8888_process_tail
+
+/******************************************************************************/
diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index f40ff36..62c0f41 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -51,6 +51,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, over_8888_8888,
PIXMAN_ARM_BIND_FAST_PATH_SRC_DST (armv6, in_reverse_8888_8888,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, armv6, over_n_8888,
+ uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_DST (0, armv6, over_reverse_n_8888,
uint32_t, 1)
@@ -271,6 +273,11 @@ static const pixman_fast_path_t arm_simd_fast_paths[] =
SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, armv6_8888_8888),
{ PIXMAN_OP_NONE },
+
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8r8g8b8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, a8b8g8r8, armv6_composite_over_n_8888),
+ PIXMAN_STD_FAST_PATH (OVER, solid, null, x8b8g8r8, armv6_composite_over_n_8888),
};
pixman_implementation_t *
More information about the xorg-commit
mailing list