pixman: Branch 'master' - 3 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Wed Dec 16 10:58:43 PST 2009


 pixman/pixman-arm-neon-asm.S |   44 +++++++++++++++
 pixman/pixman-arm-neon-asm.h |  119 +++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-arm-neon.c     |   33 +++++++++++
 3 files changed, 196 insertions(+)

New commits:
commit 44768320709183a341d219f97c03c5b592a69355
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Thu Dec 10 00:25:58 2009 +0200

    ARM: added 'neon_combine_add_u' function

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index d2c5fc7..2986884 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -561,6 +561,16 @@ generate_composite_function \
     pixman_composite_add_8000_8000_process_pixblock_tail, \
     pixman_composite_add_8888_8888_process_pixblock_tail_head
 
+generate_composite_function_single_scanline \
+    pixman_composite_scanline_add_asm_neon, 32, 0, 32, \
+    FLAG_DST_READWRITE, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_add_8000_8000_process_pixblock_head, \
+    pixman_composite_add_8000_8000_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_process_pixblock_tail_head
+
 /******************************************************************************/
 
 .macro pixman_composite_over_8888_8888_process_pixblock_head
@@ -1172,6 +1182,16 @@ generate_composite_function \
     pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
     pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
 
+generate_composite_function_single_scanline \
+    pixman_composite_scanline_add_mask_asm_neon, 32, 32, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_head, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail, \
+    pixman_composite_add_8888_8888_8888_process_pixblock_tail_head
+
 /******************************************************************************/
 
 .macro pixman_composite_over_8888_n_8888_process_pixblock_head
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 00f75eb..efeabeb 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -544,6 +544,7 @@ neon_combine_##name##_u (pixman_implementation_t *imp,                   \
 }
 
 BIND_COMBINE_U (over)
+BIND_COMBINE_U (add)
 
 pixman_implementation_t *
 _pixman_implementation_create_arm_neon (void)
@@ -552,6 +553,7 @@ _pixman_implementation_create_arm_neon (void)
     pixman_implementation_t *imp = _pixman_implementation_create (general);
 
     imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
+    imp->combine_32[PIXMAN_OP_ADD] = neon_combine_add_u;
 
     imp->composite = arm_neon_composite;
     imp->blt = arm_neon_blt;
commit f2c7a04c41440b15a5ce1db7ab87dd5bd8c088da
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Thu Dec 10 00:22:12 2009 +0200

    ARM: added 'neon_combine_over_u' function

diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 43920be..d2c5fc7 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -628,6 +628,16 @@ generate_composite_function \
     pixman_composite_over_8888_8888_process_pixblock_tail, \
     pixman_composite_over_8888_8888_process_pixblock_tail_head
 
+generate_composite_function_single_scanline \
+    pixman_composite_scanline_over_asm_neon, 32, 0, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    default_init, \
+    default_cleanup, \
+    pixman_composite_over_8888_8888_process_pixblock_head, \
+    pixman_composite_over_8888_8888_process_pixblock_tail, \
+    pixman_composite_over_8888_8888_process_pixblock_tail_head
+
 /******************************************************************************/
 
 /* TODO: expand macros and do better instructions scheduling */
@@ -1271,6 +1281,20 @@ generate_composite_function \
     0,  /* src_basereg   */ \
     12  /* mask_basereg  */
 
+generate_composite_function_single_scanline \
+    pixman_composite_scanline_over_mask_asm_neon, 32, 32, 32, \
+    FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+    8, /* number of pixels, processed in a single block */ \
+    pixman_composite_over_8888_8888_8888_init, \
+    pixman_composite_over_8888_8888_8888_cleanup, \
+    pixman_composite_over_8888_n_8888_process_pixblock_head, \
+    pixman_composite_over_8888_n_8888_process_pixblock_tail, \
+    pixman_composite_over_8888_8888_8888_process_pixblock_tail_head \
+    28, /* dst_w_basereg */ \
+    4,  /* dst_r_basereg */ \
+    0,  /* src_basereg   */ \
+    12  /* mask_basereg  */
+
 /******************************************************************************/
 
 /* TODO: expand macros and do better instructions scheduling */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 00237da..00f75eb 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -516,12 +516,43 @@ arm_neon_fill (pixman_implementation_t *imp,
 	imp->delegate, bits, stride, bpp, x, y, width, height, xor);
 }
 
+#define BIND_COMBINE_U(name)                                             \
+void                                                                     \
+pixman_composite_scanline_##name##_mask_asm_neon (int32_t         w,     \
+                                                  const uint32_t *dst,   \
+                                                  const uint32_t *src,   \
+                                                  const uint32_t *mask); \
+                                                                         \
+void                                                                     \
+pixman_composite_scanline_##name##_asm_neon (int32_t         w,          \
+                                             const uint32_t *dst,        \
+                                             const uint32_t *src);       \
+                                                                         \
+static void                                                              \
+neon_combine_##name##_u (pixman_implementation_t *imp,                   \
+                         pixman_op_t              op,                    \
+                         uint32_t *               dest,                  \
+                         const uint32_t *         src,                   \
+                         const uint32_t *         mask,                  \
+                         int                      width)                 \
+{                                                                        \
+    if (mask)                                                            \
+	pixman_composite_scanline_##name##_mask_asm_neon (width, dest,   \
+	                                                  src, mask);    \
+    else                                                                 \
+	pixman_composite_scanline_##name##_asm_neon (width, dest, src);  \
+}
+
+BIND_COMBINE_U (over)
+
 pixman_implementation_t *
 _pixman_implementation_create_arm_neon (void)
 {
     pixman_implementation_t *general = _pixman_implementation_create_fast_path ();
     pixman_implementation_t *imp = _pixman_implementation_create (general);
 
+    imp->combine_32[PIXMAN_OP_OVER] = neon_combine_over_u;
+
     imp->composite = arm_neon_composite;
     imp->blt = arm_neon_blt;
     imp->fill = arm_neon_fill;
commit 24cd286af6f4507eb9937ced6d9998d296c77a0a
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Wed Dec 9 23:49:04 2009 +0200

    ARM: macro template for single scanline compositing functions
    
    Existing template already supports 2D images processing,
    but pixman also needs some NEON optimized functions for
    improving performance when compositing is decoupled
    into "fetch -> process -> store" stages and done via
    temporary scanline buffer. That's why a new simplified
    template which deals only with the generation of single
    scanline processing functions is handy.

diff --git a/pixman/pixman-arm-neon-asm.h b/pixman/pixman-arm-neon-asm.h
index c7a5f14..56c3fae 100644
--- a/pixman/pixman-arm-neon-asm.h
+++ b/pixman/pixman-arm-neon-asm.h
@@ -780,6 +780,125 @@ fname:
     .endfunc
 .endm
 
+/*
+ * A simplified variant of function generation template for a single
+ * scanline processing (for implementing pixman combine functions)
+ */
+.macro generate_composite_function_single_scanline fname, \
+                                                   src_bpp_, \
+                                                   mask_bpp_, \
+                                                   dst_w_bpp_, \
+                                                   flags, \
+                                                   pixblock_size_, \
+                                                   init, \
+                                                   cleanup, \
+                                                   process_pixblock_head, \
+                                                   process_pixblock_tail, \
+                                                   process_pixblock_tail_head, \
+                                                   dst_w_basereg_ = 28, \
+                                                   dst_r_basereg_ = 4, \
+                                                   src_basereg_   = 0, \
+                                                   mask_basereg_  = 24
+
+    .func fname
+    .global fname
+    /* For ELF format also set function visibility to hidden */
+#ifdef __ELF__
+    .hidden fname
+    .type fname, %function
+#endif
+fname:
+    .set PREFETCH_TYPE_CURRENT, PREFETCH_TYPE_NONE
+/*
+ * Make some macro arguments globally visible and accessible
+ * from other macros
+ */
+    .set src_bpp, src_bpp_
+    .set mask_bpp, mask_bpp_
+    .set dst_w_bpp, dst_w_bpp_
+    .set pixblock_size, pixblock_size_
+    .set dst_w_basereg, dst_w_basereg_
+    .set dst_r_basereg, dst_r_basereg_
+    .set src_basereg, src_basereg_
+    .set mask_basereg, mask_basereg_
+/*
+ * Assign symbolic names to registers
+ */
+    W           .req        r0      /* width (is updated during processing) */
+    DST_W       .req        r1      /* destination buffer pointer for writes */
+    SRC         .req        r2      /* source buffer pointer */
+    DST_R       .req        ip      /* destination buffer pointer for reads */
+    MASK        .req        r3      /* mask pointer */
+
+.if (((flags) & FLAG_DST_READWRITE) != 0)
+    .set dst_r_bpp, dst_w_bpp
+.else
+    .set dst_r_bpp, 0
+.endif
+.if (((flags) & FLAG_DEINTERLEAVE_32BPP) != 0)
+    .set DEINTERLEAVE_32BPP_ENABLED, 1
+.else
+    .set DEINTERLEAVE_32BPP_ENABLED, 0
+.endif
+
+    init
+    mov         DST_R, DST_W
+
+    cmp         W, #pixblock_size
+    blt         8f
+
+    ensure_destination_ptr_alignment process_pixblock_head, \
+                                     process_pixblock_tail, \
+                                     process_pixblock_tail_head
+
+    subs        W, W, #pixblock_size
+    blt         7f
+
+    /* Implement "head (tail_head) ... (tail_head) tail" loop pattern */
+    pixld_a     pixblock_size, dst_r_bpp, \
+                (dst_r_basereg - pixblock_size * dst_r_bpp / 64), DST_R
+    pixld       pixblock_size, src_bpp, \
+                (src_basereg - pixblock_size * src_bpp / 64), SRC
+    pixld       pixblock_size, mask_bpp, \
+                (mask_basereg - pixblock_size * mask_bpp / 64), MASK
+    process_pixblock_head
+    subs        W, W, #pixblock_size
+    blt         2f
+1:
+    process_pixblock_tail_head
+    subs        W, W, #pixblock_size
+    bge         1b
+2:
+    process_pixblock_tail
+    pixst_a     pixblock_size, dst_w_bpp, \
+                (dst_w_basereg - pixblock_size * dst_w_bpp / 64), DST_W
+7:
+    /* Process the remaining trailing pixels in the scanline (dst aligned) */
+    process_trailing_pixels 0, 1, \
+                            process_pixblock_head, \
+                            process_pixblock_tail, \
+                            process_pixblock_tail_head
+
+    cleanup
+    bx         lr  /* exit */
+8:
+    /* Process the remaining trailing pixels in the scanline (dst unaligned) */
+    process_trailing_pixels 0, 0, \
+                            process_pixblock_head, \
+                            process_pixblock_tail, \
+                            process_pixblock_tail_head
+
+    cleanup
+    bx          lr  /* exit */
+
+    .unreq      SRC
+    .unreq      MASK
+    .unreq      DST_R
+    .unreq      DST_W
+    .unreq      W
+    .endfunc
+.endm
+
 .macro default_init
 .endm
 


More information about the xorg-commit mailing list