pixman: Branch 'master' - 2 commits

Matt Turner mattst88 at kemper.freedesktop.org
Sun Sep 21 11:10:37 PDT 2014


 pixman/pixman-mmx.c |  119 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

New commits:
commit f078727f392bc9f235df916e75634ed87177b9b4
Author: Matt Turner <mattst88 at gmail.com>
Date:   Wed Jan 2 11:16:12 2013 -0800

    mmx: Add nearest over_8888_8888
    
    lowlevel-blt-bench -n, over_8888_8888, 15 iterations on Loongson 2f:
    
               Before          After
              Mean StdDev     Mean StdDev   Change
        L1    15.8   0.02     24.0   0.06   +52.0%
        L2    14.8   0.15     23.3   0.13   +56.9%
        M     10.3   0.01     13.8   0.03   +33.6%
        HT    10.0   0.02     14.5   0.05   +44.7%
        VT     9.7   0.02     13.5   0.04   +39.2%
        R      9.1   0.01     12.2   0.04   +34.4%
        RT     7.1   0.06      8.9   0.09   +25.2%

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 63f4cdf..c7fd503 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3556,6 +3556,46 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 }
 
 static force_inline void
+scaled_nearest_scanline_mmx_8888_8888_OVER (uint32_t*       pd,
+                                            const uint32_t* ps,
+                                            int32_t         w,
+                                            pixman_fixed_t  vx,
+                                            pixman_fixed_t  unit_x,
+                                            pixman_fixed_t  src_width_fixed,
+                                            pixman_bool_t   fully_transparent_src)
+{
+    if (fully_transparent_src)
+	return;
+
+    while (w)
+    {
+	__m64 d = load (pd);
+	__m64 s = load (ps + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	store8888 (pd, core_combine_over_u_pixel_mmx (s, d));
+	pd++;
+
+	w--;
+    }
+}
+
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_cover_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, COVER)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_none_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, NONE)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_pad_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, PAD)
+FAST_NEAREST_MAINLOOP (mmx_8888_8888_normal_OVER,
+		       scaled_nearest_scanline_mmx_8888_8888_OVER,
+		       uint32_t, uint32_t, NORMAL)
+
+static force_inline void
 scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
 					      uint32_t *       dst,
 					      const uint32_t * src,
@@ -4048,6 +4088,23 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
     PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
 
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_COVER  (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NONE   (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_PAD    (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8r8g8b8, x8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8b8g8r8, x8b8g8r8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8r8g8b8, a8r8g8b8, mmx_8888_8888                     ),
+    SIMPLE_NEAREST_FAST_PATH_NORMAL (OVER,   a8b8g8r8, a8b8g8r8, mmx_8888_8888                     ),
+
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888                 ),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888                 ),
     SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888                 ),
commit f868ff5e342cfe43d75a53529895e0c2496098d4
Author: Matt Turner <mattst88 at gmail.com>
Date:   Tue Jan 1 21:18:09 2013 -0800

    mmx: Add nearest over_8888_n_8888
    
    lowlevel-blt-bench -n, over_8888_n_8888, 15 iterations on Loongson 2f:
    
               Before          After
              Mean StdDev     Mean StdDev   Change
        L1     9.7   0.01     19.2   0.02   +98.2%
        L2     9.6   0.11     19.2   0.16   +99.5%
        M      7.3   0.02     12.5   0.01   +72.0%
        HT     6.6   0.01     13.4   0.02  +103.2%
        VT     6.4   0.01     12.6   0.03   +96.1%
        R      6.3   0.01     11.2   0.01   +76.5%
        RT     4.4   0.01      8.1   0.03   +82.6%

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index f9a92ce..63f4cdf 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -3555,6 +3555,59 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
     _mm_empty ();
 }
 
+static force_inline void
+scaled_nearest_scanline_mmx_8888_n_8888_OVER (const uint32_t * mask,
+					      uint32_t *       dst,
+					      const uint32_t * src,
+					      int32_t          w,
+					      pixman_fixed_t   vx,
+					      pixman_fixed_t   unit_x,
+					      pixman_fixed_t   src_width_fixed,
+					      pixman_bool_t    zero_src)
+{
+    __m64 mm_mask;
+
+    if (zero_src || (*mask >> 24) == 0)
+	return;
+
+    mm_mask = expand_alpha (load8888 (mask));
+
+    while (w)
+    {
+	uint32_t s = *(src + pixman_fixed_to_int (vx));
+	vx += unit_x;
+	while (vx >= 0)
+	    vx -= src_width_fixed;
+
+	if (s)
+	{
+	    __m64 ms = load8888 (&s);
+	    __m64 alpha = expand_alpha (ms);
+	    __m64 dest  = load8888 (dst);
+
+	    store8888 (dst, (in_over (ms, alpha, mm_mask, dest)));
+	}
+
+	dst++;
+	w--;
+    }
+
+    _mm_empty ();
+}
+
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_cover_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, COVER, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_pad_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, PAD, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_none_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, NONE, TRUE, TRUE)
+FAST_NEAREST_MAINLOOP_COMMON (mmx_8888_n_8888_normal_OVER,
+			      scaled_nearest_scanline_mmx_8888_n_8888_OVER,
+			      uint32_t, uint32_t, uint32_t, NORMAL, TRUE, TRUE)
+
 #define BSHIFT ((1 << BILINEAR_INTERPOLATION_BITS))
 #define BMSK (BSHIFT - 1)
 
@@ -3995,6 +4048,15 @@ static const pixman_fast_path_t mmx_fast_paths[] =
     PIXMAN_STD_FAST_PATH    (IN,   a8,       null,     a8,       mmx_composite_in_8_8              ),
     PIXMAN_STD_FAST_PATH    (IN,   solid,    a8,       a8,       mmx_composite_in_n_8_8            ),
 
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888                 ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, a8r8g8b8, mmx_8888_n_8888          ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, a8b8g8r8, mmx_8888_n_8888          ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8r8g8b8, x8r8g8b8, mmx_8888_n_8888          ),
+    SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL (OVER, a8b8g8r8, x8b8g8r8, mmx_8888_n_8888          ),
+
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          a8r8g8b8, mmx_8888_8888                     ),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),
     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8,          x8r8g8b8, mmx_8888_8888                     ),


More information about the xorg-commit mailing list