pixman: Branch 'master' - 4 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Mon Sep 13 09:25:10 PDT 2010


 pixman/pixman-arm-neon-asm.S |  114 +++++++++++++++++++++++++++----------------
 pixman/pixman-arm-neon-asm.h |   59 ++++++++++++++++++++++
 pixman/pixman-arm-neon.c     |    4 +
 test/blitters-test.c         |    3 -
 test/scaling-test.c          |    3 +
 test/utils.h                 |   36 +++++++++++++
 6 files changed, 176 insertions(+), 43 deletions(-)

New commits:
commit ba6c98fc4b8f0ee02b846fd31c7e93e18e92d0af
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Thu Sep 9 12:02:59 2010 +0300

    test: detection of possible floating point registers corruption
    
    Added a pair of macros which can help to detect corruption
    of floating point registers after a function call. This may
    happen if _mm_empty() call is forgotten in MMX/SSE2 fast
    path code, or ARM NEON assembly optimized function
    forgets to save/restore d8-d15 registers before use.

diff --git a/test/blitters-test.c b/test/blitters-test.c
index fb2c4b9..9dd9163 100644
--- a/test/blitters-test.c
+++ b/test/blitters-test.c
@@ -267,6 +267,7 @@ test_composite (int testnum, int verbose)
     uint32_t *dstbuf, *srcbuf, *maskbuf;
     uint32_t crc32;
     int max_width, max_height, max_extra_stride;
+    FLOAT_REGS_CORRUPTION_DETECTOR_START ();
 
     max_width = max_height = 24 + testnum / 10000;
     max_extra_stride = 4 + testnum / 1000000;
@@ -410,7 +411,7 @@ test_composite (int testnum, int verbose)
 	    free_random_image (0, mask_img, -1);
     }
 
-
+    FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
     return crc32;
 }
 
diff --git a/test/scaling-test.c b/test/scaling-test.c
index 6aef823..b90584b 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -46,6 +46,7 @@ test_composite (int      testnum,
     uint32_t *         srcbuf;
     uint32_t *         dstbuf;
     uint32_t           crc32;
+    FLOAT_REGS_CORRUPTION_DETECTOR_START ();
 
     lcg_srand (testnum);
 
@@ -234,6 +235,8 @@ test_composite (int      testnum,
     crc32 = compute_crc32 (0, dstbuf, dst_stride * dst_height);
     free (srcbuf);
     free (dstbuf);
+
+    FLOAT_REGS_CORRUPTION_DETECTOR_FINISH ();
     return crc32;
 }
 
diff --git a/test/utils.h b/test/utils.h
index bfb76a5..a39af02 100644
--- a/test/utils.h
+++ b/test/utils.h
@@ -1,5 +1,6 @@
 #include <stdlib.h>
 #include <config.h>
+#include <assert.h>
 #include "pixman-private.h" /* For 'inline' definition */
 
 /* A primitive pseudorandom number generator,
@@ -65,3 +66,38 @@ fuzzer_test_main (const char *test_name,
 
 void
 fail_after (int seconds, const char *msg);
+
+/* A pair of macros which can help to detect corruption of
+ * floating point registers after a function call. This may
+ * happen if _mm_empty() call is forgotten in MMX/SSE2 fast
+ * path code, or ARM NEON assembly optimized function forgets
+ * to save/restore d8-d15 registers before use.
+ */
+
+#define FLOAT_REGS_CORRUPTION_DETECTOR_START()                 \
+    static volatile double frcd_volatile_constant1 = 123451;   \
+    static volatile double frcd_volatile_constant2 = 123452;   \
+    static volatile double frcd_volatile_constant3 = 123453;   \
+    static volatile double frcd_volatile_constant4 = 123454;   \
+    static volatile double frcd_volatile_constant5 = 123455;   \
+    static volatile double frcd_volatile_constant6 = 123456;   \
+    static volatile double frcd_volatile_constant7 = 123457;   \
+    static volatile double frcd_volatile_constant8 = 123458;   \
+    double frcd_canary_variable1 = frcd_volatile_constant1;    \
+    double frcd_canary_variable2 = frcd_volatile_constant2;    \
+    double frcd_canary_variable3 = frcd_volatile_constant3;    \
+    double frcd_canary_variable4 = frcd_volatile_constant4;    \
+    double frcd_canary_variable5 = frcd_volatile_constant5;    \
+    double frcd_canary_variable6 = frcd_volatile_constant6;    \
+    double frcd_canary_variable7 = frcd_volatile_constant7;    \
+    double frcd_canary_variable8 = frcd_volatile_constant8;
+
+#define FLOAT_REGS_CORRUPTION_DETECTOR_FINISH()                \
+    assert (frcd_canary_variable1 == frcd_volatile_constant1); \
+    assert (frcd_canary_variable2 == frcd_volatile_constant2); \
+    assert (frcd_canary_variable3 == frcd_volatile_constant3); \
+    assert (frcd_canary_variable4 == frcd_volatile_constant4); \
+    assert (frcd_canary_variable5 == frcd_volatile_constant5); \
+    assert (frcd_canary_variable6 == frcd_volatile_constant6); \
+    assert (frcd_canary_variable7 == frcd_volatile_constant7); \
+    assert (frcd_canary_variable8 == frcd_volatile_constant8);
commit e470c0dc5bcbf1e153bf035a823a7bdf629e6e25
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Sep 7 01:15:57 2010 +0300

    ARM: added 'neon_composite_over_0565_8_0565' fast path

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index f979f31..9f6568f 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1808,3 +1808,65 @@ generate_composite_function \
     0, /* dst_r_basereg */ \
     0, /* src_basereg   */ \
     0  /* mask_basereg  */
+
+/******************************************************************************/
+
+.macro pixman_composite_over_0565_8_0565_process_pixblock_head
+    /* mask is in d15 */
+    convert_0565_to_x888 q4, d2, d1, d0
+    convert_0565_to_x888 q5, d6, d5, d4
+    /* source pixel data is in      {d0, d1, d2, XX} */
+    /* destination pixel data is in {d4, d5, d6, XX} */
+    vmvn.8      d7,  d15
+    vmull.u8    q6,  d15, d2
+    vmull.u8    q5,  d15, d1
+    vmull.u8    q4,  d15, d0
+    vmull.u8    q8,  d7,  d4
+    vmull.u8    q9,  d7,  d5
+    vmull.u8    q13, d7,  d6
+    vrshr.u16   q12, q6,  #8
+    vrshr.u16   q11, q5,  #8
+    vrshr.u16   q10, q4,  #8
+    vraddhn.u16 d2,  q6,  q12
+    vraddhn.u16 d1,  q5,  q11
+    vraddhn.u16 d0,  q4,  q10
+.endm
+
+.macro pixman_composite_over_0565_8_0565_process_pixblock_tail
+    vrshr.u16   q14, q8,  #8
+    vrshr.u16   q15, q9,  #8
+    vrshr.u16   q12, q13, #8
+    vraddhn.u16 d28, q14, q8
+    vraddhn.u16 d29, q15, q9
+    vraddhn.u16 d30, q12, q13
+    vqadd.u8    q0,  q0,  q14
+    vqadd.u8    q1,  q1,  q15
+    /* 32bpp result is in {d0, d1, d2, XX} */
+    convert_8888_to_0565 d2, d1, d0, q14, q15, q3
+.endm
+
+/* TODO: expand macros and do better instructions scheduling */
+.macro pixman_composite_over_0565_8_0565_process_pixblock_tail_head
+    vld1.8     {d15}, [MASK]!
+    pixman_composite_over_0565_8_0565_process_pixblock_tail
+    vld1.16    {d8, d9}, [SRC]!
+    vld1.16    {d10, d11}, [DST_R, :128]!
+    cache_preload 8, 8
+    pixman_composite_over_0565_8_0565_process_pixblock_head
+    vst1.16    {d28, d29}, [DST_W, :128]!
+.endm
+
+generate_composite_function \
+    pixman_composite_over_0565_8_0565_asm_neon, 16, 8, 16, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    5, /* prefetch distance */ \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
+    pixman_composite_over_0565_8_0565_process_pixblock_head, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail, \
+    pixman_composite_over_0565_8_0565_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    10,  /* dst_r_basereg */ \
+    8,  /* src_basereg   */ \
+    15  /* mask_basereg  */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index dc88f50..ece6054 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -90,6 +90,8 @@ PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8888_8888,
                                         uint32_t, 1, uint32_t, 1, uint32_t, 1)
 PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_8888_8_0565,
                                         uint32_t, 1, uint8_t, 1, uint16_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_SRC_MASK_DST (neon, over_0565_8_0565,
+                                        uint16_t, 1, uint8_t, 1, uint16_t, 1)
 
 void
 pixman_composite_src_n_8_asm_neon (int32_t   w,
@@ -241,6 +243,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       x8b8g8r8, neon_composite_over_8888_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8,       r5g6b5,   neon_composite_over_8888_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, a8,       b5g6r5,   neon_composite_over_8888_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, r5g6b5,   a8,       r5g6b5,   neon_composite_over_0565_8_0565),
+    PIXMAN_STD_FAST_PATH (OVER, b5g6r5,   a8,       b5g6r5,   neon_composite_over_0565_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, a8r8g8b8, neon_composite_over_8888_8888_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null,     r5g6b5,   neon_composite_over_8888_0565),
     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null,     b5g6r5,   neon_composite_over_8888_0565),
commit a5bf7c3b1a103c6b676c864df009b1f0ad3f8195
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Sep 7 01:10:43 2010 +0300

    ARM: helper macros for conversion between 8888/x888/0565 formats

diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index 4a0290f..dec73d7 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -920,3 +920,46 @@ fname:
 .macro default_cleanup_need_all_regs
     vpop        {d8-d15}
 .endm
+
+/******************************************************************************/
+
+/*
+ * Conversion of 8 r5g6b6 pixels packed in 128-bit register (in)
+ * into a planar a8r8g8b8 format (with a, r, g, b color components
+ * stored into 64-bit registers out_a, out_r, out_g, out_b respectively).
+ *
+ * Warning: the conversion is destructive and the original
+ *          value (in) is lost.
+ */
+.macro convert_0565_to_8888 in, out_a, out_r, out_g, out_b
+    vshrn.u16   out_r, in,    #8
+    vshrn.u16   out_g, in,    #3
+    vsli.u16    in,    in,    #5
+    vmov.u8     out_a, #255
+    vsri.u8     out_r, out_r, #5
+    vsri.u8     out_g, out_g, #6
+    vshrn.u16   out_b, in,    #2
+.endm
+
+.macro convert_0565_to_x888 in, out_r, out_g, out_b
+    vshrn.u16   out_r, in,    #8
+    vshrn.u16   out_g, in,    #3
+    vsli.u16    in,    in,    #5
+    vsri.u8     out_r, out_r, #5
+    vsri.u8     out_g, out_g, #6
+    vshrn.u16   out_b, in,    #2
+.endm
+
+/*
+ * Conversion from planar a8r8g8b8 format (with a, r, g, b color components
+ * in 64-bit registers in_a, in_r, in_g, in_b respectively) into 8 r5g6b6
+ * pixels packed in 128-bit register (out). Requires two temporary 128-bit
+ * registers (tmp1, tmp2)
+ */
+.macro convert_8888_to_0565 in_r, in_g, in_b, out, tmp1, tmp2
+    vshll.u8    tmp1, in_g, #8
+    vshll.u8    out, in_r, #8
+    vshll.u8    tmp2, in_b, #8
+    vsri.u16    out, tmp1, #5
+    vsri.u16    out, tmp2, #11
+.endm
commit 8e299702f315fc1f0f97ab93d905ed5d9c41410e
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Sep 7 01:05:44 2010 +0300

    ARM: common init/cleanup macro for saving/restoring NEON registers
    
    This is a typical prologue/epilogue for many NEON fast path functions, so
    it makes sense to provide common reusable macros for it in the header file.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 325f6e7..f979f31 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -892,21 +892,13 @@ generate_composite_function \
     vst1.16     {d28, d29}, [DST_W, :128]!
 .endm
 
-.macro pixman_composite_over_8888_8_0565_init
-    vpush       {d8-d15}
-.endm
-
-.macro pixman_composite_over_8888_8_0565_cleanup
-    vpop        {d8-d15}
-.endm
-
 generate_composite_function \
     pixman_composite_over_8888_8_0565_asm_neon, 32, 8, 16, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
     5, /* prefetch distance */ \
-    pixman_composite_over_8888_8_0565_init, \
-    pixman_composite_over_8888_8_0565_cleanup, \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
     pixman_composite_over_n_8_0565_process_pixblock_head, \
     pixman_composite_over_n_8_0565_process_pixblock_tail, \
     pixman_composite_over_8888_8_0565_process_pixblock_tail_head, \
@@ -1519,14 +1511,6 @@ generate_composite_function_single_scanline \
     vraddhn.u16 d31, q13, q11
 .endm
 
-.macro pixman_composite_out_reverse_8888_8888_8888_init
-    vpush       {d8-d15}
-.endm
-
-.macro pixman_composite_out_reverse_8888_8888_8888_cleanup
-    vpop        {d8-d15}
-.endm
-
 /* TODO: expand macros and do better instructions scheduling */
 .macro pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head
     vld4.8     {d4, d5, d6, d7}, [DST_R, :128]!
@@ -1542,8 +1526,8 @@ generate_composite_function_single_scanline \
     pixman_composite_scanline_out_reverse_mask_asm_neon, 32, 32, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
-    pixman_composite_out_reverse_8888_8888_8888_init, \
-    pixman_composite_out_reverse_8888_8888_8888_cleanup, \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
     pixman_composite_out_reverse_8888_n_8888_process_pixblock_head, \
     pixman_composite_out_reverse_8888_n_8888_process_pixblock_tail, \
     pixman_composite_out_reverse_8888_8888_8888_process_pixblock_tail_head \
@@ -1609,21 +1593,13 @@ generate_composite_function \
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
-.macro pixman_composite_over_8888_8888_8888_init
-    vpush       {d8-d15}
-.endm
-
-.macro pixman_composite_over_8888_8888_8888_cleanup
-    vpop        {d8-d15}
-.endm
-
 generate_composite_function \
     pixman_composite_over_8888_8888_8888_asm_neon, 32, 32, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
     5, /* prefetch distance */ \
-    pixman_composite_over_8888_8888_8888_init, \
-    pixman_composite_over_8888_8888_8888_cleanup, \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
     pixman_composite_over_8888_n_8888_process_pixblock_head, \
     pixman_composite_over_8888_n_8888_process_pixblock_tail, \
     pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
@@ -1636,8 +1612,8 @@ generate_composite_function_single_scanline \
     pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
-    pixman_composite_over_8888_8888_8888_init, \
-    pixman_composite_over_8888_8888_8888_cleanup, \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
     pixman_composite_over_8888_n_8888_process_pixblock_head, \
     pixman_composite_over_8888_n_8888_process_pixblock_tail, \
     pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
@@ -1659,21 +1635,13 @@ generate_composite_function_single_scanline \
     vst4.8     {d28, d29, d30, d31}, [DST_W, :128]!
 .endm
 
-.macro pixman_composite_over_8888_8_8888_init
-    vpush       {d8-d15}
-.endm
-
-.macro pixman_composite_over_8888_8_8888_cleanup
-    vpop        {d8-d15}
-.endm
-
 generate_composite_function \
     pixman_composite_over_8888_8_8888_asm_neon, 32, 8, 32, \
     FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
     8, /* number of pixels, processed in a single block */ \
     5, /* prefetch distance */ \
-    pixman_composite_over_8888_8_8888_init, \
-    pixman_composite_over_8888_8_8888_cleanup, \
+    default_init_need_all_regs, \
+    default_cleanup_need_all_regs, \
     pixman_composite_over_8888_n_8888_process_pixblock_head, \
     pixman_composite_over_8888_n_8888_process_pixblock_tail, \
     pixman_composite_over_8888_8_8888_process_pixblock_tail_head \
diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index 56c3fae..4a0290f 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -899,8 +899,24 @@ fname:
     .endfunc
 .endm
 
+/* Default prologue/epilogue, nothing special needs to be done */
+
 .macro default_init
 .endm
 
 .macro default_cleanup
 .endm
+
+/*
+ * Prologue/epilogue variant which additionally saves/restores d8-d15
+ * registers (they need to be saved/restored by callee according to ABI).
+ * This is required if the code needs to use all the NEON registers.
+ */
+
+.macro default_init_need_all_regs
+    vpush       {d8-d15}
+.endm
+
+.macro default_cleanup_need_all_regs
+    vpop        {d8-d15}
+.endm


More information about the xorg-commit mailing list