pixman: Branch 'master' - 3 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Wed Dec 9 05:54:28 PST 2009


 pixman/pixman-arm-neon-asm.S |  130 +++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-neon.c     |    6 +
 2 files changed, 136 insertions(+)

New commits:
commit ce78288d7783a27700223c39e23880f4f425f70b
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Fri Nov 6 02:25:47 2009 +0200

    ARM: added 'neon_composite_src_pixbuf_8888' fast path
    
    This is ARM NEON optimized conversion of native RGBA format used by
    GTK/GDK into native 32bpp RGBA format used by cairo/pixman.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 8010e80..db1833d 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1408,3 +1408,60 @@ generate_composite_function \
     0, /* dst_r_basereg */ \
     0, /* src_basereg   */ \
     0  /* mask_basereg  */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_head
+    vmull.u8    q8, d3, d0
+    vmull.u8    q9, d3, d1
+    vmull.u8    q10, d3, d2
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail
+    vrshr.u16   q11, q8, #8
+    vswp        d3, d31
+    vrshr.u16   q12, q9, #8
+    vrshr.u16   q13, q10, #8
+    vraddhn.u16 d30, q11, q8
+    vraddhn.u16 d29, q12, q9
+    vraddhn.u16 d28, q13, q10
+.endm
+
+.macro pixman_composite_src_pixbuf_8888_process_pixblock_tail_head
+        vrshr.u16   q11, q8, #8
+        vswp        d3, d31
+        vrshr.u16   q12, q9, #8
+        vrshr.u16   q13, q10, #8
+    vld4.8 {d0, d1, d2, d3}, [SRC]!
+        vraddhn.u16 d30, q11, q8
+                                    PF add PF_X, PF_X, #8
+                                    PF tst PF_CTL, #0xF
+                                    PF addne PF_X, PF_X, #8
+                                    PF subne PF_CTL, PF_CTL, #1
+        vraddhn.u16 d29, q12, q9
+        vraddhn.u16 d28, q13, q10
+    vmull.u8    q8, d3, d0
+    vmull.u8    q9, d3, d1
+    vmull.u8    q10, d3, d2
+        vst4.8 {d28, d29, d30, d31}, [DST_W, :128]!
+                                    PF cmp PF_X, ORIG_W
+                                    PF pld, [PF_SRC, PF_X, lsl #src_bpp_shift]
+                                    PF subge PF_X, PF_X, ORIG_W
+                                    PF subges PF_CTL, PF_CTL, #0x10
+                                    PF ldrgeb DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
+.endm
+
+generate_composite_function \
+    pixman_composite_src_pixbuf_8888_asm_neon, 32, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_pixbuf_8888_process_pixblock_head, \
+    pixman_composite_src_pixbuf_8888_process_pixblock_tail, \
+    pixman_composite_src_pixbuf_8888_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    0, /* dst_r_basereg */ \
+    0, /* src_basereg   */ \
+    0  /* mask_basereg  */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 21ed436..00237da 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -257,6 +257,7 @@ BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
 BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
 BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
+BIND_SRC_NULL_DST(src_pixbuf_8888, uint32_t, 1, uint32_t, 1)
 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
@@ -401,6 +402,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_SRC,  PIXMAN_r8g8b8,   PIXMAN_null,     PIXMAN_r8g8b8,   neon_composite_src_0888_0888    },
     { PIXMAN_OP_SRC,  PIXMAN_b8g8r8,   PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev },
     { PIXMAN_OP_SRC,  PIXMAN_b8g8r8,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_0888_0565_rev },
+    { PIXMAN_OP_SRC,  PIXMAN_pixbuf,   PIXMAN_pixbuf,   PIXMAN_a8r8g8b8, neon_composite_src_pixbuf_8888  },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888    },
commit a732d3baeb0697b91a713fd6b51b68ee7ca68e03
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Thu Nov 5 20:27:38 2009 +0200

    ARM: added 'neon_composite_src_0888_0565_rev' fast path
    
    This is ARM NEON optimized conversion of native RGB format used by
    GTK/GDK into r5g6b5 format.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index bb68be6..8010e80 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1370,3 +1370,41 @@ generate_composite_function \
     0, /* dst_r_basereg */ \
     0, /* src_basereg   */ \
     0  /* mask_basereg  */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_head
+    vshll.u8    q8, d1, #8
+    vshll.u8    q9, d2, #8
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail
+    vshll.u8    q14, d0, #8
+    vsri.u16    q14, q8, #5
+    vsri.u16    q14, q9, #11
+.endm
+
+.macro pixman_composite_src_0888_0565_rev_process_pixblock_tail_head
+        vshll.u8    q14, d0, #8
+    vld3.8 {d0, d1, d2}, [SRC]!
+        vsri.u16    q14, q8, #5
+        vsri.u16    q14, q9, #11
+    vshll.u8    q8, d1, #8
+        vst1.16 {d28, d29}, [DST_W, :128]!
+    vshll.u8    q9, d2, #8
+.endm
+
+generate_composite_function \
+    pixman_composite_src_0888_0565_rev_asm_neon, 24, 0, 16, \
+    FLAG_DST_WRITEONLY, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_src_0888_0565_rev_process_pixblock_head, \
+    pixman_composite_src_0888_0565_rev_process_pixblock_tail, \
+    pixman_composite_src_0888_0565_rev_process_pixblock_tail_head, \
+    28, /* dst_w_basereg */ \
+    0, /* dst_r_basereg */ \
+    0, /* src_basereg   */ \
+    0  /* mask_basereg  */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index ac0c558..21ed436 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -256,6 +256,7 @@ BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
 BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
 BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
+BIND_SRC_NULL_DST(src_0888_0565_rev, uint8_t, 3, uint16_t, 1)
 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
@@ -399,6 +400,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888    },
     { PIXMAN_OP_SRC,  PIXMAN_r8g8b8,   PIXMAN_null,     PIXMAN_r8g8b8,   neon_composite_src_0888_0888    },
     { PIXMAN_OP_SRC,  PIXMAN_b8g8r8,   PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev },
+    { PIXMAN_OP_SRC,  PIXMAN_b8g8r8,   PIXMAN_null,     PIXMAN_r5g6b5,   neon_composite_src_0888_0565_rev },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888    },
commit a1386a1ceb0c50d2e23cf30be30ea165d2d2ea7c
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Thu Nov 5 19:43:09 2009 +0200

    ARM: added 'neon_src_0888_8888_rev' fast path
    
    This is ARM NEON optimized conversion of native RGB format used by
    GTK/GDK into native 32bpp RGB format used by cairo/pixman.

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 691a194..bb68be6 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1335,3 +1335,38 @@ generate_composite_function \
     0, /* dst_r_basereg */ \
     0, /* src_basereg   */ \
     0  /* mask_basereg  */
+
+/******************************************************************************/
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_head
+    vswp   d0, d2
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_process_pixblock_tail_head
+    vst4.8 {d0, d1, d2, d3}, [DST_W]!
+    vld3.8 {d0, d1, d2}, [SRC]!
+    vswp   d0, d2
+    cache_preload 8, 8
+.endm
+
+.macro pixman_composite_src_0888_8888_rev_init
+    veor   d3, d3, d3
+.endm
+
+generate_composite_function \
+    pixman_composite_src_0888_8888_rev_asm_neon, 24, 0, 32, \
+    FLAG_DST_WRITEONLY | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    10, /* prefetch distance */ \
+    pixman_composite_src_0888_8888_rev_init, \
+    default_cleanup, \
+    pixman_composite_src_0888_8888_rev_process_pixblock_head, \
+    pixman_composite_src_0888_8888_rev_process_pixblock_tail, \
+    pixman_composite_src_0888_8888_rev_process_pixblock_tail_head, \
+    0, /* dst_w_basereg */ \
+    0, /* dst_r_basereg */ \
+    0, /* src_basereg   */ \
+    0  /* mask_basereg  */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index fef98a1..ac0c558 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -255,6 +255,7 @@ BIND_SRC_NULL_DST(src_0565_0565, uint16_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(src_0888_0888, uint8_t, 3, uint8_t, 3)
 BIND_SRC_NULL_DST(src_8888_0565, uint32_t, 1, uint16_t, 1)
 BIND_SRC_NULL_DST(src_0565_8888, uint16_t, 1, uint32_t, 1)
+BIND_SRC_NULL_DST(src_0888_8888_rev, uint8_t, 3, uint32_t, 1)
 BIND_SRC_NULL_DST(add_8000_8000, uint8_t, 1, uint8_t, 1)
 BIND_SRC_NULL_DST(add_8888_8888, uint32_t, 1, uint32_t, 1)
 
@@ -397,6 +398,7 @@ static const pixman_fast_path_t arm_neon_fast_path_array[] =
     { PIXMAN_OP_SRC,  PIXMAN_a8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888    },
     { PIXMAN_OP_SRC,  PIXMAN_x8b8g8r8, PIXMAN_null,     PIXMAN_x8b8g8r8, neon_composite_src_8888_8888    },
     { PIXMAN_OP_SRC,  PIXMAN_r8g8b8,   PIXMAN_null,     PIXMAN_r8g8b8,   neon_composite_src_0888_0888    },
+    { PIXMAN_OP_SRC,  PIXMAN_b8g8r8,   PIXMAN_null,     PIXMAN_x8r8g8b8, neon_composite_src_0888_8888_rev },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_r5g6b5,   neon_composite_over_n_8_0565    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_b5g6r5,   neon_composite_over_n_8_0565    },
     { PIXMAN_OP_OVER, PIXMAN_solid,    PIXMAN_a8,       PIXMAN_a8r8g8b8, neon_composite_over_n_8_8888    },


More information about the xorg-commit mailing list