pixman: Branch 'master' - 2 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Wed May 23 11:10:50 PDT 2012


 pixman/pixman-mips-dspr2-asm.S |  123 ++++++++++++++++++++++++++++++-----------
 pixman/pixman-mips-dspr2-asm.h |   56 ++++++++++++++++++
 pixman/pixman-mips-dspr2.c     |    6 ++
 pixman/pixman-mips-dspr2.h     |   52 +++++++++++++++++
 4 files changed, 205 insertions(+), 32 deletions(-)

New commits:
commit 30816e3068bccf7c78c78f916b54971d24873bdc
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Thu May 3 00:03:43 2012 +0200

    MIPS: DSPr2: Added bilinear over_8888_8_8888 fast path.
    
    Performance numbers before/after on MIPS-74kc @ 1GHz
    
    Referent (before):
    
    cairo-perf-trace:
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [ # ]    image: pixman 0.25.3
    [  0]    image             firefox-fishtank 2289.180 2290.567   0.05%    5/6
    
    Optimized:
    
    cairo-perf-trace:
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [ # ]    image: pixman 0.25.3
    [  0]    image             firefox-fishtank 1700.925 1708.314   0.22%    5/6

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index f3908da..87558f0 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -747,3 +747,66 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
      nop
 
 END(pixman_composite_over_n_8_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+/*
+ * a0     - dst        (a8r8g8b8)
+ * a1     - mask       (a8)
+ * a2     - src_top    (a8r8g8b8)
+ * a3     - src_bottom (a8r8g8b8)
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       v1, 60(sp)        /* v1 = w(sp + 32 + 28 save regs stack offset)*/
+    beqz     v1, 1f
+     nop
+
+    lw       s0, 44(sp)        /* s0 = wt */
+    lw       s1, 48(sp)        /* s1 = wb */
+    lw       s2, 52(sp)        /* s2 = vx */
+    lw       s3, 56(sp)        /* s3 = unit_x */
+    li       v0, 256
+    li       s8, 0x00ff00ff
+0:
+    andi     t4, s2, 0xffff    /* t4 = (short)vx */
+    srl      t4, t4, 8         /* t4 = vx >> 8 */
+    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a2)        /* t0 = tl */
+    lwx      t1, t8(a2)        /* t1 = tr */
+    addiu    v1, v1, -1
+    lwx      t2, t9(a3)        /* t2 = bl */
+    lwx      t3, t8(a3)        /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, \
+                                      t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lbu      t1, 0(a1)         /* t1 = mask */
+    lw       t2, 0(a0)         /* t2 = dst */
+    addiu    a1, a1, 1
+    OVER_8888_8_8888 t0, t1, t2, t0, s8, t3, t4, t5, t6
+
+    addu     s2, s2, s3        /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     v1, 0b
+     addiu   a0, a0, 4
+
+1:
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 8383060..7cf3281 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -566,4 +566,60 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb              \out2_8888, \d2_8888,  \scratch2
 .endm
 
+.macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br,         \
+                                         scratch1, scratch2,     \
+                                         alpha, red, green, blue \
+                                         wt1, wt2, wb1, wb2
+    andi            \scratch1, \tl,  0xff
+    andi            \scratch2, \tr,  0xff
+    andi            \alpha,    \bl,  0xff
+    andi            \red,      \br,  0xff
+
+    multu           $ac0,      \wt1, \scratch1
+    maddu           $ac0,      \wt2, \scratch2
+    maddu           $ac0,      \wb1, \alpha
+    maddu           $ac0,      \wb2, \red
+
+    ext             \scratch1, \tl,  8, 8
+    ext             \scratch2, \tr,  8, 8
+    ext             \alpha,    \bl,  8, 8
+    ext             \red,      \br,  8, 8
+
+    multu           $ac1,      \wt1, \scratch1
+    maddu           $ac1,      \wt2, \scratch2
+    maddu           $ac1,      \wb1, \alpha
+    maddu           $ac1,      \wb2, \red
+
+    ext             \scratch1, \tl,  16, 8
+    ext             \scratch2, \tr,  16, 8
+    ext             \alpha,    \bl,  16, 8
+    ext             \red,      \br,  16, 8
+
+    mflo            \blue,     $ac0
+
+    multu           $ac2,      \wt1, \scratch1
+    maddu           $ac2,      \wt2, \scratch2
+    maddu           $ac2,      \wb1, \alpha
+    maddu           $ac2,      \wb2, \red
+
+    ext             \scratch1, \tl,  24, 8
+    ext             \scratch2, \tr,  24, 8
+    ext             \alpha,    \bl,  24, 8
+    ext             \red,      \br,  24, 8
+
+    mflo            \green,    $ac1
+
+    multu           $ac3,      \wt1, \scratch1
+    maddu           $ac3,      \wt2, \scratch2
+    maddu           $ac3,      \wb1, \alpha
+    maddu           $ac3,      \wb2, \red
+
+    mflo            \red,      $ac2
+    mflo            \alpha,    $ac3
+
+    precr.qb.ph     \alpha,    \alpha, \red
+    precr.qb.ph     \scratch1, \green, \blue
+    precrq.qb.ph    \tl,       \alpha, \scratch1
+.endm
+
 #endif //PIXMAN_MIPS_DSPR2_ASM_H
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 7081734..06d4335 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,6 +58,9 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
                                        uint8_t, 1, uint16_t, 1)
 
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER,
+                                             uint32_t, uint32_t)
+
 static pixman_bool_t
 pixman_fill_mips (uint32_t *bits,
                   int       stride,
@@ -206,6 +209,9 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
+
     { PIXMAN_OP_NONE },
 };
 
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index cc35d02..5036938 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -127,4 +127,56 @@ mips_composite_##name (pixman_implementation_t *imp,                \
     }                                                               \
 }
 
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op,          \
+                                                src_type, dst_type)           \
+void                                                                          \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (                    \
+                                             dst_type *       dst,            \
+                                             const uint8_t *  mask,           \
+                                             const src_type * top,            \
+                                             const src_type * bottom,         \
+                                             int              wt,             \
+                                             int              wb,             \
+                                             pixman_fixed_t   x,              \
+                                             pixman_fixed_t   ux,             \
+                                             int              width);         \
+                                                                              \
+static force_inline void                                                      \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,            \
+                                             const uint8_t *  mask,           \
+                                             const src_type * src_top,        \
+                                             const src_type * src_bottom,     \
+                                             int32_t          w,              \
+                                             int              wt,             \
+                                             int              wb,             \
+                                             pixman_fixed_t   vx,             \
+                                             pixman_fixed_t   unit_x,         \
+                                             pixman_fixed_t   max_vx,         \
+                                             pixman_bool_t    zero_src)       \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+        return;                                                               \
+    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (                \
+                      dst, mask, src_top, src_bottom, wt, wb, vx, unit_x, w); \
+}                                                                             \
+                                                                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                      \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, COVER,                    \
+                       FLAG_HAVE_NON_SOLID_MASK)                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                       \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, NONE,                     \
+                       FLAG_HAVE_NON_SOLID_MASK)                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                        \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, PAD,                      \
+                       FLAG_HAVE_NON_SOLID_MASK)                              \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                     \
+                       scaled_bilinear_scanline_mips_##name##_##op,           \
+                       src_type, uint8_t, dst_type, NORMAL,                   \
+                       FLAG_HAVE_NON_SOLID_MASK)
+
 #endif //PIXMAN_MIPS_DSPR2_H
commit aea0522f6f1a51b97a673cfe4dc157e501008580
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Wed May 23 18:53:43 2012 +0200

    MIPS: DSPr2: Fix bug in over_n_8888_8888_ca/over_n_8888_0565_ca routines
    
    In main loop (unrolled by factor 2), instead of negating multiplied
    mask values by srca, values of srca was negated, and passed as alpha
    argument for
    
        UN8x4_MUL_UN8x4_ADD_UN8x4 macro.
    
    Instead of:
    
        ma = ~ma;
        UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
    
    Code was doing this:
    
        ma = ~srca;
        UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
    
    Key is in substituting registers s0/s1 (containing srca value), with
    t0/t1 containing mask values multiplied by srca.  Register usage is
    also improved (less registers are saved on stack, for
    over_n_8888_8888_ca routine).
    
    The bug was introduced in commit d2ee5631 and revealed by composite test.

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 68ad33f..f3908da 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -317,7 +317,7 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
  * a3 - w
  */
 
-    SAVE_REGS_ON_STACK 16, s0, s1, s2, s3, s4, s5, s6, s7
+    SAVE_REGS_ON_STACK 8, s0, s1, s2, s3, s4, s5
     beqz         a3, 4f
      nop
     li           t6, 0xff
@@ -337,23 +337,21 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
     beqz         t2, 12f      /* if (t0 == 0) && (t1 == 0) */
      addiu       a2, a2, 8
     and          t3, t0, t1
-    move         s0, t8       /* s0 = srca */
-    move         s1, t8       /* s1 = srca */
     move         t4, a1       /* t4 = src */
     move         t5, a1       /* t5 = src */
     lw           t2, 0(a0)    /* t2 = dst */
     beq          t3, t7, 11f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     lw          t3, 4(a0)    /* t0 = dst */
+     lw          t3, 4(a0)    /* t3 = dst */
     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
-    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, s0, s1, t9, s2, s3, s4, s5, s6, s7
+    MIPS_2xUN8x4_MUL_2xUN8   t0, t1, t8, t8, t0, t1, t9, s0, s1, s2, s3, s4, s5
 11:
-    not          s0, s0
-    not          s1, s1
-    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, s0, s1, s2, s3, t9, t0, t1, s4, s5, s6, s7
-    addu_s.qb    t0, t4, s2
-    addu_s.qb    t1, t5, s3
-    sw           t0, 0(a0)
-    sw           t1, 4(a0)
+    not          t0, t0
+    not          t1, t1
+    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+    addu_s.qb    t2, t4, t2
+    addu_s.qb    t3, t5, t3
+    sw           t2, 0(a0)
+    sw           t3, 4(a0)
 12:
     addiu        a3, a3, -2
     addiu        t1, a3, -1
@@ -369,20 +367,20 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
     beqz         t2, 22f      /* if (t0 == 0) & (t1 == 0) */
      addiu       a2, a2, 8
     and          t2, t0, t1
-    move         s0, a1
+    move         t4, a1
     beq          t2, t7, 21f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
-     move        s1, a1
+     move        t5, a1
     lw           t2, 0(a0)    /* t2 = dst */
     lw           t3, 4(a0)    /* t3 = dst */
     MIPS_2xUN8x4_MUL_2xUN8x4 a1, a1, t0, t1, t4, t5, t9, s0, s1, s2, s3, s4, s5
     not          t0, t0
     not          t1, t1
-    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, s0, s1, t9, s2, s3, s4, s5, s6, s7
-    addu_s.qb    s0, t4, s0
-    addu_s.qb    s1, t5, s1
+    MIPS_2xUN8x4_MUL_2xUN8x4 t2, t3, t0, t1, t2, t3, t9, s0, s1, s2, s3, s4, s5
+    addu_s.qb    t4, t4, t2
+    addu_s.qb    t5, t5, t3
 21:
-    sw           s0, 0(a0)
-    sw           s1, 4(a0)
+    sw           t4, 0(a0)
+    sw           t5, 4(a0)
 22:
     addiu        a3, a3, -2
     addiu        t1, a3, -1
@@ -395,20 +393,19 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
     lw           t1, 0(a2)    /* t1 = mask */
     beqz         t1, 4f
      nop
-    move         s0, t8       /* s0 = srca */
     move         t2, a1       /* t2 = src */
     beq          t1, t7, 31f
      lw          t0, 0(a0)    /* t0 = dst */
 
     MIPS_UN8x4_MUL_UN8x4  a1, t1, t2, t9, t3, t4, t5, t6
-    MIPS_UN8x4_MUL_UN8    t1, t8, s0, t9, t3, t4, t5
+    MIPS_UN8x4_MUL_UN8    t1, t8, t1, t9, t3, t4, t5
 31:
-    not          s0, s0
-    MIPS_UN8x4_MUL_UN8x4  t0, s0, t3, t9, t4, t5, t6, t1
-    addu_s.qb    t0, t2, t3
+    not          t1, t1
+    MIPS_UN8x4_MUL_UN8x4  t0, t1, t0, t9, t3, t4, t5, t6
+    addu_s.qb    t0, t2, t0
     sw           t0, 0(a0)
 4:
-    RESTORE_REGS_FROM_STACK 16, s0, s1, s2, s3, s4, s5, s6, s7
+    RESTORE_REGS_FROM_STACK 8, s0, s1, s2, s3, s4, s5
     j            ra
      nop
 
@@ -447,8 +444,8 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
     beqz         t2, 12f      /* if (t0 == 0) && (t1 == 0) */
      addiu       a2, a2, 8
     and          t3, t0, t1
-    move         t0, t8
-    move         t1, a1
+    move         s2, a1       /* s2 = src */
+    move         s3, a1       /* s3 = src */
     lhu          t2, 0(a0)    /* t2 = dst */
     beq          t3, s1, 11f  /* if (t0 == 0xffffffff) && (t1 == 0xffffffff) */
      lhu         t3, 2(a0)    /* t3 = dst */
@@ -461,7 +458,7 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
     MIPS_2xUN8x4_MUL_2xUN8x4 s4, s5, t0, t1, s4, s5, t9, t4, s6, s7, s8, t0, t1
     addu_s.qb    s2, s2, s4
     addu_s.qb    s3, s3, s5
-    CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s1, s2
+    CONVERT_2x8888_TO_2x0565 s2, s3, t2, t3, t5, t6, t7, s4, s5
     sh           t2, 0(a0)
     sh           t3, 2(a0)
 12:
@@ -507,17 +504,16 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_0565_ca_asm_mips)
     lw           t1, 0(a2)    /* t1 = mask */
     beqz         t1, 4f
      nop
-    move         s0, t8       /* s0 = srca */
     move         t2, a1       /* t2 = src */
     beq          t1, t7, 31f
      lhu         t0, 0(a0)    /* t0 = dst */
 
     MIPS_UN8x4_MUL_UN8x4     a1, t1, t2, t9, t3, t4, t5, t6
-    MIPS_UN8x4_MUL_UN8       t1, t8, s0, t9, t3, t4, t5
+    MIPS_UN8x4_MUL_UN8       t1, t8, t1, t9, t3, t4, t5
 31:
-    not          s0, s0
+    not          t1, t1
     CONVERT_1x0565_TO_1x8888 t0, s1, s2, s3
-    MIPS_UN8x4_MUL_UN8x4     s1, s0, t3, t9, t4, t5, t6, t1
+    MIPS_UN8x4_MUL_UN8x4     s1, t1, t3, t9, t4, t5, t6, t7
     addu_s.qb    t0, t2, t3
     CONVERT_1x8888_TO_1x0565 t0, s1, s2, s3
     sh           s1, 0(a0)


More information about the xorg-commit mailing list