pixman: Branch 'master' - 2 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Sun Jul 8 11:42:21 PDT 2012


 pixman/pixman-mips-dspr2-asm.S |  704 +++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2-asm.h |   34 +
 pixman/pixman-mips-dspr2.c     |   51 ++
 pixman/pixman-mips-dspr2.h     |   49 ++
 4 files changed, 838 insertions(+)

New commits:
commit 86ad09b548b45a5a5074d9d83970d5e7e7f89d31
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Mon Jul 2 20:54:20 2012 +0200

    MIPS: DSPr2: Added more bilinear fast paths (without mask)
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench -b
    
    Referent (before):
      src_8888_8888 =  L1:   8.18  L2:   7.79  M:  6.32 ( 33.51%)  HT:  5.78  VT:  5.70  R:  5.61  RT:  3.79 (  29Kops/s)
      src_8888_0565 =  L1:   6.90  L2:   7.14  M:  6.47 ( 25.75%)  HT:  5.54  VT:  5.51  R:  5.46  RT:  3.53 (  28Kops/s)
      src_0565_x888 =  L1:   3.76  L2:   3.71  M:  3.37 ( 13.41%)  HT:  3.26  VT:  3.22  R:  3.20  RT:  2.58 (  23Kops/s)
      src_0565_0565 =  L1:   3.59  L2:   3.56  M:  3.47 (  9.19%)  HT:  3.19  VT:  3.18  R:  3.16  RT:  2.46 (  22Kops/s)
     over_8888_8888 =  L1:   5.99  L2:   5.66  M:  4.95 ( 26.28%)  HT:  4.40  VT:  4.38  R:  4.31  RT:  3.02 (  26Kops/s)
      add_8888_8888 =  L1:   6.84  L2:   6.39  M:  5.48 ( 29.09%)  HT:  4.80  VT:  4.79  R:  4.70  RT:  3.20 (  27Kops/s)
    
    Optimized:
      src_8888_8888 =  L1:  18.27  L2:  16.69  M: 12.87 ( 68.25%)  HT: 11.80  VT: 11.61  R: 10.60  RT:  7.05 (  41Kops/s)
      src_8888_0565 =  L1:  15.18  L2:  14.10  M: 11.75 ( 46.71%)  HT: 10.64  VT: 10.50  R: 10.03  RT:  7.15 (  41Kops/s)
      src_0565_x888 =  L1:  10.45  L2:   9.96  M:  9.23 ( 36.72%)  HT:  8.39  VT:  8.29  R:  8.02  RT:  5.75 (  37Kops/s)
      src_0565_0565 =  L1:   9.37  L2:   8.98  M:  8.50 ( 22.53%)  HT:  7.71  VT:  7.66  R:  7.52  RT:  5.59 (  37Kops/s)
     over_8888_8888 =  L1:  12.21  L2:  11.01  M:  8.56 ( 45.36%)  HT:  7.71  VT:  7.64  R:  7.43  RT:  5.51 (  36Kops/s)
      add_8888_8888 =  L1:  17.72  L2:  15.16  M: 10.78 ( 57.13%)  HT:  9.46  VT:  9.30  R:  9.00  RT:  6.03 (  38Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index ac56746..a8fccd5 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -749,6 +749,373 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
 
 END(pixman_composite_over_n_8_0565_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw       s0, 36(sp)     /* s0 = wt */
+    lw       s1, 40(sp)     /* s1 = wb */
+    lw       s2, 44(sp)     /* s2 = vx */
+    lw       s3, 48(sp)     /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a1)     /* t0 = tl */
+    lwx      t1, t8(a1)     /* t1 = tr */
+    addiu    a3, a3, -1
+    lwx      t2, t9(a2)     /* t2 = bl */
+    lwx      t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw       s0, 36(sp)     /* s0 = wt */
+    lw       s1, 40(sp)     /* s1 = wb */
+    lw       s2, 44(sp)     /* s2 = vx */
+    lw       s3, 48(sp)     /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a1)     /* t0 = tl */
+    lwx      t1, t8(a1)     /* t1 = tr */
+    addiu    a3, a3, -1
+    lwx      t2, t9(a2)     /* t2 = bl */
+    lwx      t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sh       t1, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 2
+
+    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)     /* s0 = wt */
+    lw       s1, 48(sp)     /* s1 = wb */
+    lw       s2, 52(sp)     /* s2 = vx */
+    lw       s3, 56(sp)     /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+    li       v1, 0x07e007e0
+    li       s8, 0x001f001f
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 1
+    addiu    t8, t9, 2
+    lhx      t0, t9(a1)     /* t0 = tl */
+    lhx      t1, t8(a1)     /* t1 = tr */
+    andi     t1, t1, 0xffff
+    addiu    a3, a3, -1
+    lhx      t2, t9(a2)     /* t2 = bl */
+    lhx      t3, t8(a2)     /* t3 = br */
+    andi     t3, t3, 0xffff
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_0565_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)     /* s0 = wt */
+    lw       s1, 48(sp)     /* s1 = wb */
+    lw       s2, 52(sp)     /* s2 = vx */
+    lw       s3, 56(sp)     /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+    li       v1, 0x07e007e0
+    li       s8, 0x001f001f
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 1
+    addiu    t8, t9, 2
+    lhx      t0, t9(a1)     /* t0 = tl */
+    lhx      t1, t8(a1)     /* t1 = tr */
+    andi     t1, t1, 0xffff
+    addiu    a3, a3, -1
+    lhx      t2, t9(a2)     /* t2 = bl */
+    lhx      t3, t8(a2)     /* t3 = br */
+    andi     t3, t3, 0xffff
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sh       t1, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 2
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_0565_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz     a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 40(sp)     /* s0 = wt */
+    lw       s1, 44(sp)     /* s1 = wb */
+    lw       s2, 48(sp)     /* s2 = vx */
+    lw       s3, 52(sp)     /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+    li       s8, 0x00ff00ff
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a1)     /* t0 = tl */
+    lwx      t1, t8(a1)     /* t1 = tr */
+    addiu    a3, a3, -1
+    lwx      t2, t9(a2)     /* t2 = bl */
+    lwx      t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lw       t1, 0(a0)      /* t1 = dest */
+    OVER_8888_8888 t0, t1, t2, s8, t3, t4, t5, t6
+
+    addu     s2, s2, s3     /* vx += unit_x; */
+    sw       t2, 0(a0)
+    bnez     a3, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 24, v0, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *src_top
+ * a2     - *src_bottom
+ * a3     - w
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ */
+
+    beqz         a3, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+
+    lw           s0, 36(sp)     /* s0 = wt */
+    lw           s1, 40(sp)     /* s1 = wb */
+    lw           s2, 44(sp)     /* s2 = vx */
+    lw           s3, 48(sp)     /* s3 = unit_x */
+    li           v0, BILINEAR_INTERPOLATION_RANGE
+
+    sll          s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll          s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi         t4, s2, 0xffff /* t4 = (short)vx */
+    srl          t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu         t5, v0, t4     /* t5 = ( 256 - (vx>>8)) */
+
+    mul          s4, s0, t5     /* s4 = wt*(256-(vx>>8)) */
+    mul          s5, s0, t4     /* s5 = wt*(vx>>8) */
+    mul          s6, s1, t5     /* s6 = wb*(256-(vx>>8)) */
+    mul          s7, s1, t4     /* s7 = wb*(vx>>8) */
+
+    sra          t9, s2, 16
+    sll          t9, t9, 2
+    addiu        t8, t9, 4
+    lwx          t0, t9(a1)     /* t0 = tl */
+    lwx          t1, t8(a1)     /* t1 = tr */
+    addiu        a3, a3, -1
+    lwx          t2, t9(a2)     /* t2 = bl */
+    lwx          t3, t8(a2)     /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lw           t1, 0(a0)
+    addu_s.qb    t2, t0, t1
+
+    addu         s2, s2, s3     /* vx += unit_x; */
+    sw           t2, 0(a0)
+    bnez         a3, 0b
+     addiu       a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 20, v0, s0, s1, s2, s3, s4, s5, s6, s7
+1:
+    j            ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8888_ADD_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 24b049e..7327dc6 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -566,6 +566,27 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb              \out2_8888, \d2_8888,  \scratch2
 .endm
 
+/*
+ * OVER operation on single a8r8g8b8 source pixel (s_8888) and single a8r8g8b8
+ * destination pixel (d_8888). It also requires maskLSR needed for rounding
+ * process. maskLSR must have following value:
+ *   li       maskLSR, 0x00ff00ff
+ */
+.macro OVER_8888_8888 s_8888,   \
+                      d_8888,   \
+                      out_8888, \
+                      maskLSR,  \
+                      scratch1, scratch2, scratch3, scratch4
+    not                \scratch1, \s_8888
+    srl                \scratch1, \scratch1, 24
+
+    MIPS_UN8x4_MUL_UN8 \d_8888,   \scratch1, \
+                       \out_8888, \maskLSR, \
+                       \scratch2, \scratch3, \scratch4
+
+    addu_s.qb          \out_8888, \out_8888, \s_8888
+.endm
+
 .macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
                                     m_8,      \
                                     d_8888,   \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 66c0e5d..63a0225 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,6 +58,19 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
                                        uint8_t, 1, uint16_t, 1)
 
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_8888, SRC,
+                                          uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 8888_0565, SRC,
+                                          uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_8888, SRC,
+                                          uint16_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (0, 0565_0565, SRC,
+                                          uint16_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
+                                          uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
+                                          uint32_t, uint32_t)
+
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
                                              uint32_t, uint32_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
@@ -219,6 +232,22 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_0565),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8888),
+    SIMPLE_BILINEAR_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_0565),
+
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8888),
+    SIMPLE_BILINEAR_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8888),
+
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 5036938..a3d774f 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -127,6 +127,55 @@ mips_composite_##name (pixman_implementation_t *imp,                \
     }                                                               \
 }
 
+/****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
+                                                 src_type, dst_type)         \
+void                                                                         \
+pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips(                    \
+                                             dst_type *       dst,           \
+                                             const src_type * src_top,       \
+                                             const src_type * src_bottom,    \
+                                             int32_t          w,             \
+                                             int              wt,            \
+                                             int              wb,            \
+                                             pixman_fixed_t   vx,            \
+                                             pixman_fixed_t   unit_x);       \
+static force_inline void                                                     \
+scaled_bilinear_scanline_mips_##name##_##op (dst_type *       dst,           \
+                                             const uint32_t * mask,          \
+                                             const src_type * src_top,       \
+                                             const src_type * src_bottom,    \
+                                             int32_t          w,             \
+                                             int              wt,            \
+                                             int              wb,            \
+                                             pixman_fixed_t   vx,            \
+                                             pixman_fixed_t   unit_x,        \
+                                             pixman_fixed_t   max_vx,        \
+                                             pixman_bool_t    zero_src)      \
+{                                                                            \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                 \
+        return;                                                              \
+    pixman_scaled_bilinear_scanline_##name##_##op##_asm_mips (dst, src_top,  \
+                                                              src_bottom, w, \
+                                                              wt, wb,        \
+                                                              vx, unit_x);   \
+}                                                                            \
+                                                                             \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_cover_##op,                     \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, COVER, FLAG_NONE)       \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_none_##op,                      \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, NONE, FLAG_NONE)        \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_pad_##op,                       \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, PAD, FLAG_NONE)         \
+FAST_BILINEAR_MAINLOOP_COMMON (mips_##name##_normal_##op,                    \
+                       scaled_bilinear_scanline_mips_##name##_##op,          \
+                       src_type, uint32_t, dst_type, NORMAL,                 \
+                       FLAG_NONE)
+
 /*****************************************************************************/
 
 #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST(flags, name, op,          \
commit 707a8be11280c4d395e662e869d4a98d75bb5571
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Mon Jul 2 20:54:19 2012 +0200

    MIPS: DSPr2: Added several bilinear fast paths with a8 mask
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench -b
    
    Referent (before):
    
      src_8888_8_8888 =  L1:   6.37  L2:   6.08  M:  5.46 ( 32.57%)  HT:  4.64  VT:  4.61  R:  4.52  RT:  2.85 (  23Kops/s)
      src_8888_8_0565 =  L1:   5.89  L2:   5.66  M:  5.11 ( 23.71%)  HT:  4.36  VT:  4.34  R:  4.26  RT:  2.71 (  22Kops/s)
      src_0565_8_x888 =  L1:   3.32  L2:   3.27  M:  3.17 ( 14.71%)  HT:  2.86  VT:  2.84  R:  2.81  RT:  2.07 (  19Kops/s)
      src_0565_8_0565 =  L1:   3.19  L2:   3.15  M:  3.05 ( 10.11%)  HT:  2.75  VT:  2.74  R:  2.71  RT:  2.00 (  18Kops/s)
     over_8888_8_8888 =  L1:   4.99  L2:   4.71  M:  4.11 ( 27.22%)  HT:  3.59  VT:  3.58  R:  3.50  RT:  2.36 (  21Kops/s)
      add_8888_8_8888 =  L1:   5.60  L2:   5.26  M:  4.52 ( 29.95%)  HT:  3.92  VT:  3.89  R:  3.80  RT:  2.49 (  21Kops/s)
    
    Optimized:
    
      src_8888_8_8888 =  L1:  13.19  L2:  12.13  M:  9.75 ( 58.22%)  HT:  8.60  VT:  8.44  R:  7.90  RT:  5.06 (  33Kops/s)
      src_8888_8_0565 =  L1:  11.64  L2:  10.81  M:  9.18 ( 42.63%)  HT:  8.04  VT:  7.90  R:  7.57  RT:  5.02 (  32Kops/s)
      src_0565_8_x888 =  L1:   8.34  L2:   7.95  M:  7.29 ( 33.85%)  HT:  6.55  VT:  6.48  R:  6.25  RT:  4.35 (  30Kops/s)
      src_0565_8_0565 =  L1:   7.71  L2:   7.35  M:  6.90 ( 22.90%)  HT:  6.14  VT:  6.10  R:  5.94  RT:  4.07 (  29Kops/s)
     over_8888_8_8888 =  L1:   9.73  L2:   8.99  M:  7.15 ( 47.41%)  HT:  6.40  VT:  6.30  R:  6.11  RT:  4.28 (  30Kops/s)
      add_8888_8_8888 =  L1:  13.01  L2:  11.72  M:  8.70 ( 57.68%)  HT:  7.59  VT:  7.46  R:  7.20  RT:  4.74 (  32Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 48f108e..ac56746 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -749,6 +749,278 @@ LEAF_MIPS_DSPR2(pixman_composite_over_n_8_0565_asm_mips)
 
 END(pixman_composite_over_n_8_0565_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *mask
+ * a2     - *src_top
+ * a3     - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+    lw       v1, 32(sp)
+    beqz     v1, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)        /* s0 = wt */
+    lw       s1, 48(sp)        /* s1 = wb */
+    lw       s2, 52(sp)        /* s2 = vx */
+    lw       s3, 56(sp)        /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+    li       s8, 0x00ff00ff
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff    /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a2)        /* t0 = tl */
+    lwx      t1, t8(a2)        /* t1 = tr */
+    addiu    v1, v1, -1
+    lwx      t2, t9(a3)        /* t2 = bl */
+    lwx      t3, t8(a3)        /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lbu      t1, 0(a1)         /* t1 = mask */
+    addiu    a1, a1, 1
+    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
+
+    addu     s2, s2, s3        /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     v1, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_8888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *mask
+ * a2     - *src_top
+ * a3     - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+    lw       v1, 32(sp)
+    beqz     v1, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)        /* s0 = wt */
+    lw       s1, 48(sp)        /* s1 = wb */
+    lw       s2, 52(sp)        /* s2 = vx */
+    lw       s3, 56(sp)        /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+    li       s8, 0x00ff00ff
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff    /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a2)        /* t0 = tl */
+    lwx      t1, t8(a2)        /* t1 = tr */
+    addiu    v1, v1, -1
+    lwx      t2, t9(a3)        /* t2 = bl */
+    lwx      t3, t8(a3)        /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lbu      t1, 0(a1)         /* t1 = mask */
+    addiu    a1, a1, 1
+    MIPS_UN8x4_MUL_UN8 t0, t1, t0, s8, t2, t3, t4
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    addu     s2, s2, s3        /* vx += unit_x; */
+    sh       t1, 0(a0)
+    bnez     v1, 0b
+     addiu   a0, a0, 2
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_0565_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *mask
+ * a2     - *src_top
+ * a3     - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+    lw       t0, 32(sp)
+    beqz     t0, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+
+    lw       s0, 48(sp)        /* s0 = wt */
+    lw       s1, 52(sp)        /* s1 = wb */
+    lw       s2, 56(sp)        /* s2 = vx */
+    lw       s3, 60(sp)        /* s3 = unit_x */
+    lw       ra, 64(sp)        /* ra = w */
+    li       v0, 0x00ff00ff
+    li       v1, 0x07e007e0
+    li       s8, 0x001f001f
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff    /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    li       t5, BILINEAR_INTERPOLATION_RANGE
+    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 1
+    addiu    t8, t9, 2
+    lhx      t0, t9(a2)        /* t0 = tl */
+    lhx      t1, t8(a2)        /* t1 = tr */
+    andi     t1, t1, 0xffff
+    addiu    ra, ra, -1
+    lhx      t2, t9(a3)        /* t2 = bl */
+    lhx      t3, t8(a3)        /* t3 = br */
+    andi     t3, t3, 0xffff
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lbu      t1, 0(a1)         /* t1 = mask */
+    addiu    a1, a1, 1
+    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
+
+    addu     s2, s2, s3        /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     ra, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_0565_8_x888_SRC_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *mask
+ * a2     - *src_top
+ * a3     - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+    lw       t0, 32(sp)
+    beqz     t0, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+
+    lw       s0, 48(sp)        /* s0 = wt */
+    lw       s1, 52(sp)        /* s1 = wb */
+    lw       s2, 56(sp)        /* s2 = vx */
+    lw       s3, 60(sp)        /* s3 = unit_x */
+    lw       ra, 64(sp)        /* ra = w */
+    li       v0, 0x00ff00ff
+    li       v1, 0x07e007e0
+    li       s8, 0x001f001f
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff    /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    li       t5, BILINEAR_INTERPOLATION_RANGE
+    subu     t5, t5, t4        /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 1
+    addiu    t8, t9, 2
+    lhx      t0, t9(a2)        /* t0 = tl */
+    lhx      t1, t8(a2)        /* t1 = tr */
+    andi     t1, t1, 0xffff
+    addiu    ra, ra, -1
+    lhx      t2, t9(a3)        /* t2 = bl */
+    lhx      t3, t8(a3)        /* t3 = br */
+    andi     t3, t3, 0xffff
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, t0, t1, v1, s8, t4, t5, t6, t7
+    CONVERT_2x0565_TO_2x8888 t2, t3, t2, t3, v1, s8, t4, t5, t6, t7
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lbu      t1, 0(a1)         /* t1 = mask */
+    addiu    a1, a1, 1
+    MIPS_UN8x4_MUL_UN8 t0, t1, t0, v0, t2, t3, t4
+    CONVERT_1x8888_TO_1x0565 t0, t1, t2, t3
+
+    addu     s2, s2, s3        /* vx += unit_x; */
+    sh       t1, 0(a0)
+    bnez     ra, 0b
+     addiu   a0, a0, 2
+
+    RESTORE_REGS_FROM_STACK 32, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8, ra
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_0565_8_0565_SRC_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
 /*
  * a0     - dst        (a8r8g8b8)
@@ -815,3 +1087,68 @@ LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
      nop
 
 END(pixman_scaled_bilinear_scanline_8888_8_8888_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
+/*
+ * a0     - *dst
+ * a1     - *mask
+ * a2     - *src_top
+ * a3     - *src_bottom
+ * 16(sp) - wt
+ * 20(sp) - wb
+ * 24(sp) - vx
+ * 28(sp) - unit_x
+ * 32(sp) - w
+ */
+
+    lw       v1, 32(sp)
+    beqz     v1, 1f
+     nop
+
+    SAVE_REGS_ON_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+
+    lw       s0, 44(sp)        /* s0 = wt */
+    lw       s1, 48(sp)        /* s1 = wb */
+    lw       s2, 52(sp)        /* s2 = vx */
+    lw       s3, 56(sp)        /* s3 = unit_x */
+    li       v0, BILINEAR_INTERPOLATION_RANGE
+    li       s8, 0x00ff00ff
+
+    sll      s0, s0, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+    sll      s1, s1, (2 * (8 - BILINEAR_INTERPOLATION_BITS))
+0:
+    andi     t4, s2, 0xffff    /* t4 = (short)vx */
+    srl      t4, t4, (16 - BILINEAR_INTERPOLATION_BITS) /* t4 = vx >> 8 */
+    subu     t5, v0, t4        /* t5 = ( 256 - (vx>>8)) */
+
+    mul      s4, s0, t5        /* s4 = wt*(256-(vx>>8)) */
+    mul      s5, s0, t4        /* s5 = wt*(vx>>8) */
+    mul      s6, s1, t5        /* s6 = wb*(256-(vx>>8)) */
+    mul      s7, s1, t4        /* s7 = wb*(vx>>8) */
+
+    sra      t9, s2, 16
+    sll      t9, t9, 2
+    addiu    t8, t9, 4
+    lwx      t0, t9(a2)        /* t0 = tl */
+    lwx      t1, t8(a2)        /* t1 = tr */
+    addiu    v1, v1, -1
+    lwx      t2, t9(a3)        /* t2 = bl */
+    lwx      t3, t8(a3)        /* t3 = br */
+
+    BILINEAR_INTERPOLATE_SINGLE_PIXEL t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, s4, s5, s6, s7
+    lbu      t1, 0(a1)         /* t1 = mask */
+    lw       t2, 0(a0)         /* t2 = dst */
+    addiu    a1, a1, 1
+    MIPS_UN8x4_MUL_UN8_ADD_UN8x4 t0, t1, t2, t0, s8, t3, t4, t5
+
+    addu     s2, s2, s3        /* vx += unit_x; */
+    sw       t0, 0(a0)
+    bnez     v1, 0b
+     addiu   a0, a0, 4
+
+    RESTORE_REGS_FROM_STACK 28, v0, v1, s0, s1, s2, s3, s4, s5, s6, s7, s8
+1:
+    j        ra
+     nop
+
+END(pixman_scaled_bilinear_scanline_8888_8_8888_ADD_asm_mips)
diff --git a/pixman/pixman-mips-dspr2-asm.h b/pixman/pixman-mips-dspr2-asm.h
index 7cf3281..24b049e 100644
--- a/pixman/pixman-mips-dspr2-asm.h
+++ b/pixman/pixman-mips-dspr2-asm.h
@@ -566,6 +566,19 @@ LEAF_MIPS32R2(symbol)                                   \
     addu_s.qb              \out2_8888, \d2_8888,  \scratch2
 .endm
 
+.macro MIPS_UN8x4_MUL_UN8_ADD_UN8x4 s_8888,   \
+                                    m_8,      \
+                                    d_8888,   \
+                                    out_8888, \
+                                    maskLSR,  \
+                                    scratch1, scratch2, scratch3
+    MIPS_UN8x4_MUL_UN8 \s_8888, \m_8, \
+                       \out_8888, \maskLSR, \
+                       \scratch1, \scratch2, \scratch3
+
+    addu_s.qb          \out_8888, \out_8888, \d_8888
+.endm
+
 .macro BILINEAR_INTERPOLATE_SINGLE_PIXEL tl, tr, bl, br,         \
                                          scratch1, scratch2,     \
                                          alpha, red, green, blue \
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 06d4335..66c0e5d 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -58,8 +58,18 @@ PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_8888,
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8_0565,
                                        uint8_t, 1, uint16_t, 1)
 
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
+                                             uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
+                                             uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_x888, SRC,
+                                             uint16_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 0565_8_0565, SRC,
+                                             uint16_t, uint16_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, OVER,
                                              uint32_t, uint32_t)
+PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_8888, ADD,
+                                             uint32_t, uint32_t)
 
 static pixman_bool_t
 pixman_fill_mips (uint32_t *bits,
@@ -209,9 +219,21 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
 
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8_8888),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, a8r8g8b8, r5g6b5, mips_8888_8_0565),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, x8r8g8b8, r5g6b5, mips_8888_8_0565),
+
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, x8r8g8b8, mips_0565_8_x888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (SRC, r5g6b5, r5g6b5, mips_0565_8_0565),
+
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
     SIMPLE_BILINEAR_A8_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
 
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, a8r8g8b8, mips_8888_8_8888),
+    SIMPLE_BILINEAR_A8_MASK_FAST_PATH (ADD, a8r8g8b8, x8r8g8b8, mips_8888_8_8888),
     { PIXMAN_OP_NONE },
 };
 


More information about the xorg-commit mailing list