pixman: Branch 'master' - 4 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Mon Sep 13 09:25:10 PDT 2010
pixman/pixman-arm-neon-asm.S | 114 +++++++++++++++++++++++++++----------------
pixman/pixman-arm-neon-asm.h | 59 ++++++++++++++++++++++
pixman/pixman-arm-neon.c | 4 +
test/blitters-test.c | 3 -
test/scaling-test.c | 3 +
test/utils.h | 36 +++++++++++++
6 files changed, 176 insertions(+), 43 deletions(-)
New commits:
commit ba6c98fc4b8f0ee02b846fd31c7e93e18e92d0af
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Thu Sep 9 12:02:59 2010 +0300
test: detection of possible floating point registers corruption
Added a pair of macros which can help to detect corruption
of floating point registers after a function call. This may
happen if _mm_empty() call is forgotten in MMX/SSE2 fast
path code, or ARM NEON assembly optimized function
forgets to save/restore d8-d15 registers before use.
diff --git a/test/blitters-test.c b/test/blitters-test.c
index fb2c4b9..9dd9163 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -267,6 +267,7 @@ test_composite (int testnum, int verbose)
uint32_t *dstbuf, *srcbuf, *maskbuf;
uint32_t crc32;
int max_width, max_height, max_extra_stride;
+ FLOAT_REGS_CORRUPTION_DETECTOR_START ();
max_width = max_height = 24 + testnum / 10000;
max_extra_stride = 4 + testnum / 1000000;
@@ -410,7 +411,7 @@ test_composite (int testnum, int verbose)
free_random_image (0, mask_img, -1);
}
-
+ FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
return crc32;
}
diff --git a/test/scaling-test.c b/test/scaling-test.c
index 6aef823..b90584b 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -46,6 +46,7 @@ test_composite (int testnum,
uint32_t * srcbuf;
uint32_t * dstbuf;
uint32_t crc32;
+ FLOAT_REGS_CORRUPTION_DETECTOR_START ();
lcg_srand (testnum);
@@ -234,6 +235,8 @@ test_composite (int testnum,
crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
free (srcbuf);
free (dstbuf);
+
+ FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
return crc32;
}
diff --git a/test/utils.h b/test/utils.h
index bfb76a5..a39af02 100644
--- a/test/utils.h
+++ b/test/utils.h
@@ -1,5 +1,6 @@
#include <stdlib.h>
#include <config.h>
+#include <assert.h>
#include "pixman-private.h" /* For 'inline' definition */
/* A primitive pseudorandom number generator,
@@ -65,3 +66,38 @@ fuzzer_test_main (const char *test_name,
void
fail_after (int seconds, const char *msg);
+
+/* A pair of macros which can help to detect corruption of
+ * floating point registers after a function call. This may
+ * happen if _mm_empty() call is forgotten in MMX/SSE2 fast
+ * path code, or ARM NEON assembly optimized function forgets
+ * to save/restore d8-d15 registers before use.
+ */
+
+#define FLOAT_REGS_CORRUPTION_DETECTOR_START() \
+ static volatile double frcd_volatile_constant1 = 123451; \
+ static volatile double frcd_volatile_constant2 = 123452; \
+ static volatile double frcd_volatile_constant3 = 123453; \
+ static volatile double frcd_volatile_constant4 = 123454; \
+ static volatile double frcd_volatile_constant5 = 123455; \
+ static volatile double frcd_volatile_constant6 = 123456; \
+ static volatile double frcd_volatile_constant7 = 123457; \
+ static volatile double frcd_volatile_constant8 = 123458; \
+ double frcd_canary_variable1 = frcd_volatile_constant1; \
+ double frcd_canary_variable2 = frcd_volatile_constant2; \
+ double frcd_canary_variable3 = frcd_volatile_constant3; \
+ double frcd_canary_variable4 = frcd_volatile_constant4; \
+ double frcd_canary_variable5 = frcd_volatile_constant5; \
+ double frcd_canary_variable6 = frcd_volatile_constant6; \
+ double frcd_canary_variable7 = frcd_volatile_constant7; \
+ double frcd_canary_variable8 = frcd_volatile_constant8;
+
+#define FLOAT_REGS_CORRUPTION_DETECTOR_FINISH() \
+ assert (frcd_canary_variable1 == frcd_volatile_constant1); \
+ assert (frcd_canary_variable2 == frcd_volatile_constant2); \
+ assert (frcd_canary_variable3 == frcd_volatile_constant3); \
+ assert (frcd_canary_variable4 == frcd_volatile_constant4); \
+ assert (frcd_canary_variable5 == frcd_volatile_constant5); \
+ assert (frcd_canary_variable6 == frcd_volatile_constant6); \
+ assert (frcd_canary_variable7 == frcd_volatile_constant7); \
+ assert (frcd_canary_variable8 == frcd_volatile_constant8);
commit e470c0dc5bcbf1e153bf035a823a7bdf629e6e25
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Tue Sep 7 01:15:57 2010 +0300
ARM: added 'neon_composite_over_0565_8_0565' fast path
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index f979f31..9f6568f 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1808,3 +1808,65 @@ generate_composite_function \
0, /* dst_r_basereg */ \
0, /* src_basereg */ \
0 /* mask_basereg */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_0565_8_0565_process_pixblock_head
+ /* mask is in d15 */
+ convert_0565_to_x888 q4, d2, d1, d0
+ convert_0565_to_x888 q5, d6, d5, d4
+ /* source pixel data is in {d0, d1, d2, XX} */
+ /* destination pixel data is in {d4, d5, d6, XX} */
+ vmvn.8 d7, d15
+ vmull.u8 q6, d15, d2
+ vmull.u8 q5, d15, d1
+ vmull.u8 q4, d15, d0
+ vmull.u8 q8, d7, d4
+ vmull.u8 q9, d7, d5
+ vmull.u8 q13, d7, d6
+ vrshr.u16 q12, q6, #8
+ vrshr.u16 q11, q5, #8
+ vrshr.u16 q10, q4, #8
+ vraddhn.u16 d2, q6, q12
+ vraddhn.u16 d1, q5, q11
+ vraddhn.u16 d0, q4, q10
+.endm
+
+.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
+ vrshr.u16 q14, q8, #8
+ vrshr.u16 q15, q9, #8
+ vrshr.u16 q12, q13, #8
+ vraddhn.u16 d28, q14, q8
+ vraddhn.u16 d29, q15, q9
+ vraddhn.u16 d30, q12, q13
+ vqadd.u8 q0, q0, q14
+ vqadd.u8 q1, q1, q15
+ /* 32bpp result is in {d0, d1, d2, XX} */
+ convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
+ vld1.8 {d15}, [MASK]!
+ pixman_composite_over_0565_8_0565_process_pixblock_tail
+ vld1.16 {d8, d9}, [SRC]!
+ vld1.16 {d10, d11}, [DST_R, :128]!
+ cache_preload 8, 8
+ pixman_composite_over_0565_8_0565_process_pixblock_head
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+ pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
+ FLAG_DST_READWRITE, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
+ pixman_composite_over_0565_8_0565_process_pixblock_head, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+ pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+ 28, /* dst_w_basereg */ \
+ 10, /* dst_r_basereg */ \
+ 8, /* src_basereg */ \
+ 15 /* mask_basereg */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index dc88f50..ece6054 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -90,6 +90,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
uint32_t, 1, uint32_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
uint32_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
+ uint16_t, 1, uint8_t, 1, uint16_t, 1)
void
pixman_composite_src_n_8_asm_neon (int32_t w,
@@ -241,6 +243,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, x8b8g8r8, neon_composite_over_8888_8_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8, r5g6b5, neon_composite_over_8888_8_0565),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8, b5g6r5, neon_composite_over_8888_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, r5g6b5, a8, r5g6b5, neon_composite_over_0565_8_0565),
+ PIXMAN_STD_FAST_PATH (OVER, b5g6r5, a8, b5g6r5, neon_composite_over_0565_8_0565),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, neon_composite_over_8888_0565),
PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, neon_composite_over_8888_0565),
commit a5bf7c3b1a103c6b676c864df009b1f0ad3f8195
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Tue Sep 7 01:10:43 2010 +0300
ARM: helper macros for conversion between 8888/x888/0565 formats
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index 4a0290f..dec73d7 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -920,3 +920,46 @@ fname:
.macro default_cleanup_need_all_regs
vpop {d8-d15}
.endm
+
+/******************************************************************************/
+
+/*
+ * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in)
+ * into a planar a8r8g8b8 format (with a, r, g, b color components
+ * stored into 64-bit registers out_a, out_r, out_g, out_b respectively).
+ *
+ * Warning: the conversion is destructive and the original
+ * value (in) is lost.
+ */
+.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
+ vshrn.u16 out_r, in, #8
+ vshrn.u16 out_g, in, #3
+ vsli.u16 in, in, #5
+ vmov.u8 out_a, #255
+ vsri.u8 out_r, out_r, #5
+ vsri.u8 out_g, out_g, #6
+ vshrn.u16 out_b, in, #2
+.endm
+
+.macro convert_0565_to_x888 in, out_r, out_g, out_b
+ vshrn.u16 out_r, in, #8
+ vshrn.u16 out_g, in, #3
+ vsli.u16 in, in, #5
+ vsri.u8 out_r, out_r, #5
+ vsri.u8 out_g, out_g, #6
+ vshrn.u16 out_b, in, #2
+.endm
+
+/*
+ * Conversion from planar a8r8g8b8 format (with a, r, g, b color components
+ * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6
+ * pixels packed in 128-bit register (out). Requires two temporary 128-bit
+ * registers (tmp1, tmp2)
+ */
+.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
+ vshll.u8 tmp1, in_g, #8
+ vshll.u8 out, in_r, #8
+ vshll.u8 tmp2, in_b, #8
+ vsri.u16 out, tmp1, #5
+ vsri.u16 out, tmp2, #11
+.endm
commit 8e299702f315fc1f0f97ab93d905ed5d9c41410e
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date: Tue Sep 7 01:05:44 2010 +0300
ARM: common init/cleanup macro for saving/restoring NEON registers
This is a typical prologue/epilogue for many NEON fast path functions, so
it makes sense to provide common reusable macros for it in the header file.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 325f6e7..f979f31 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -892,21 +892,13 @@ generate_composite_function \
vst1.16 {d28, d29}, [DST_W, :128]!
.endm
-.macro pixman_composite_over_8888_8_0565_init
- vpush {d8-d15}
-.endm
-
-.macro pixman_composite_over_8888_8_0565_cleanup
- vpop {d8-d15}
-.endm
-
generate_composite_function \
pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
8, /* number of pixels, processed in a single block */ \
5, /* prefetch distance */ \
- pixman_composite_over_8888_8_0565_init, \
- pixman_composite_over_8888_8_0565_cleanup, \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
pixman_composite_over_n_8_0565_process_pixblock_head, \
pixman_composite_over_n_8_0565_process_pixblock_tail, \
pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
@@ -1519,14 +1511,6 @@ generate_composite_function_single_scanline \
vraddhn.u16 d31, q13, q11
.endm
-.macro pixman_composite_out_reverse_8888_8888_8888_init
- vpush {d8-d15}
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_8888_cleanup
- vpop {d8-d15}
-.endm
-
/* TODO: expand macros and do better instructions scheduling */
.macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
vld4.8 {d4, d5, d6, d7}, [DST_R, :128]!
@@ -1542,8 +1526,8 @@ generate_composite_function_single_scanline \
pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
8, /* number of pixels, processed in a single block */ \
- pixman_composite_out_reverse_8888_8888_8888_init, \
- pixman_composite_out_reverse_8888_8888_8888_cleanup, \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
@@ -1609,21 +1593,13 @@ generate_composite_function \
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
.endm
-.macro pixman_composite_over_8888_8888_8888_init
- vpush {d8-d15}
-.endm
-
-.macro pixman_composite_over_8888_8888_8888_cleanup
- vpop {d8-d15}
-.endm
-
generate_composite_function \
pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
8, /* number of pixels, processed in a single block */ \
5, /* prefetch distance */ \
- pixman_composite_over_8888_8888_8888_init, \
- pixman_composite_over_8888_8888_8888_cleanup, \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
pixman_composite_over_8888_n_8888_process_pixblock_head, \
pixman_composite_over_8888_n_8888_process_pixblock_tail, \
pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
@@ -1636,8 +1612,8 @@ generate_composite_function_single_scanline \
pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
8, /* number of pixels, processed in a single block */ \
- pixman_composite_over_8888_8888_8888_init, \
- pixman_composite_over_8888_8888_8888_cleanup, \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
pixman_composite_over_8888_n_8888_process_pixblock_head, \
pixman_composite_over_8888_n_8888_process_pixblock_tail, \
pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
@@ -1659,21 +1635,13 @@ generate_composite_function_single_scanline \
vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
.endm
-.macro pixman_composite_over_8888_8_8888_init
- vpush {d8-d15}
-.endm
-
-.macro pixman_composite_over_8888_8_8888_cleanup
- vpop {d8-d15}
-.endm
-
generate_composite_function \
pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
8, /* number of pixels, processed in a single block */ \
5, /* prefetch distance */ \
- pixman_composite_over_8888_8_8888_init, \
- pixman_composite_over_8888_8_8888_cleanup, \
+ default_init_need_all_regs, \
+ default_cleanup_need_all_regs, \
pixman_composite_over_8888_n_8888_process_pixblock_head, \
pixman_composite_over_8888_n_8888_process_pixblock_tail, \
pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index 56c3fae..4a0290f 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -899,8 +899,24 @@ fname:
.endfunc
.endm
+/* Default prologue/epilogue, nothing special needs to be done */
+
.macro default_init
.endm
.macro default_cleanup
.endm
+
+/*
+ * Prologue/epilogue variant which additionally saves/restores d8-d15
+ * registers (they need to be saved/restored by callee according to ABI).
+ * This is required if the code needs to use all the NEON registers.
+ */
+
+.macro default_init_need_all_regs
+ vpush {d8-d15}
+.endm
+
+.macro default_cleanup_need_all_regs
+ vpop {d8-d15}
+.endm
More information about the xorg-commit
mailing list