pixman: Branch 'master' - 4 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Thu Jul 22 05:00:38 PDT 2010


 pixman/pixman-fast-path.c |   38 ++++++
 pixman/pixman-sse2.c      |  291 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 328 insertions(+), 1 deletion(-)

New commits:
commit bf125fbbb701788d5d9ed9ff368bb4fe9d9c895e
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Sun Apr 25 20:25:50 2010 -0400

    [sse2] Add sse2_composite_add_n_8()
    
    This shows up when epiphany displays the "ImageTest" on
    glimr.rubyforge.org/cake/canvas.html

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 822fffe..3dd7967 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5386,6 +5386,103 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
     _mm_empty ();
 }
 
+/* -------------------------------------------------------------------------
+ * composite_add_n_8_8
+ */
+
+static void
+sse2_composite_add_n_8 (pixman_implementation_t *imp,
+			pixman_op_t              op,
+			pixman_image_t *         src_image,
+			pixman_image_t *         mask_image,
+			pixman_image_t *         dst_image,
+			int32_t                  src_x,
+			int32_t                  src_y,
+			int32_t                  mask_x,
+			int32_t                  mask_y,
+			int32_t                  dest_x,
+			int32_t                  dest_y,
+			int32_t                  width,
+			int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    int dst_stride;
+    int32_t w;
+    uint32_t src;
+
+    __m128i xmm_src;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    src >>= 24;
+
+    if (src == 0x00)
+	return;
+
+    if (src == 0xff)
+    {
+	pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+		     8, dest_x, dest_y, width, height, 0xff);
+
+	return;
+    }
+
+    src = (src << 24) | (src << 16) | (src << 8) | src;
+    xmm_src = _mm_set_epi32 (src, src, src, src);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && ((unsigned long)dst & 15))
+	{
+	    *dst = (uint8_t)_mm_cvtsi64_si32 (
+		_mm_adds_pu8 (
+		    _mm_movepi64_pi64 (xmm_src),
+		    _mm_cvtsi32_si64 (*dst)));
+
+	    w--;
+	    dst++;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 16)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    save_128_aligned (
+		(__m128i*)dst, _mm_adds_epu8 (xmm_src, load_128_aligned  ((__m128i*)dst)));
+
+	    dst += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    *dst = (uint8_t)_mm_cvtsi64_si32 (
+		_mm_adds_pu8 (
+		    _mm_movepi64_pi64 (xmm_src),
+		    _mm_cvtsi32_si64 (*dst)));
+
+	    w--;
+	    dst++;
+	}
+    }
+
+    _mm_empty ();
+}
+
 /* ----------------------------------------------------------------------
  * composite_add_8000_8000
  */
@@ -6309,6 +6406,7 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, sse2_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, sse2_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, sse2_composite_add_n_8_8),
+    PIXMAN_STD_FAST_PATH (ADD, solid, null, a8, sse2_composite_add_n_8),
 
     /* PIXMAN_OP_SRC */
     PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8r8g8b8, sse2_composite_src_n_8_8888),
commit 16ae3285e6601ea177637dddd20d2857d13decac
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Sun Apr 25 19:54:28 2010 -0400

    [sse2] Add sse2_composite_in_n_8()
    
    This shows up when epiphany displays the "ImageTest" on
    glimr.rubyforge.org/cake/canvas.html

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 2c9f5eb..822fffe 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5066,6 +5066,112 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
     _mm_empty ();
 }
 
+/* -----------------------------------------------------------------------
+ * composite_in_n_8
+ */
+
+static void
+sse2_composite_in_n_8 (pixman_implementation_t *imp,
+		       pixman_op_t              op,
+		       pixman_image_t *         src_image,
+		       pixman_image_t *         mask_image,
+		       pixman_image_t *         dst_image,
+		       int32_t                  src_x,
+		       int32_t                  src_y,
+		       int32_t                  mask_x,
+		       int32_t                  mask_y,
+		       int32_t                  dest_x,
+		       int32_t                  dest_y,
+		       int32_t                  width,
+		       int32_t                  height)
+{
+    uint8_t     *dst_line, *dst;
+    int dst_stride;
+    uint32_t d;
+    uint32_t src;
+    int32_t w;
+
+    __m128i xmm_alpha;
+    __m128i xmm_dst, xmm_dst_lo, xmm_dst_hi;
+
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
+
+    src = _pixman_image_get_solid (src_image, dst_image->bits.format);
+
+    xmm_alpha = expand_alpha_1x128 (expand_pixel_32_1x128 (src));
+
+    src = src >> 24;
+
+    if (src == 0xff)
+	return;
+
+    if (src == 0x00)
+    {
+	pixman_fill (dst_image->bits.bits, dst_image->bits.rowstride,
+		     8, dest_x, dest_y, width, height, src);
+
+	return;
+    }
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+
+	while (w && ((unsigned long)dst & 15))
+	{
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		pix_multiply_1x64 (
+		    _mm_movepi64_pi64 (xmm_alpha),
+		    unpack_32_1x64 (d)));
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)dst);
+
+	while (w >= 16)
+	{
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)dst);
+
+	    xmm_dst = load_128_aligned ((__m128i*)dst);
+
+	    unpack_128_2x128 (xmm_dst, &xmm_dst_lo, &xmm_dst_hi);
+	    
+	    pix_multiply_2x128 (&xmm_alpha, &xmm_alpha,
+				&xmm_dst_lo, &xmm_dst_hi,
+				&xmm_dst_lo, &xmm_dst_hi);
+
+	    save_128_aligned (
+		(__m128i*)dst, pack_2x128_128 (xmm_dst_lo, xmm_dst_hi));
+
+	    dst += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    d = (uint32_t) *dst;
+
+	    *dst++ = (uint8_t) pack_1x64_32 (
+		pix_multiply_1x64 (
+		    _mm_movepi64_pi64 (xmm_alpha),
+		    unpack_32_1x64 (d)));
+	    w--;
+	}
+    }
+
+    _mm_empty ();
+}
+
 /* ---------------------------------------------------------------------------
  * composite_in_8_8
  */
@@ -6192,7 +6298,7 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, rpixbuf, rpixbuf, b5g6r5, sse2_composite_over_pixbuf_0565),
     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, null, x8b8g8r8, sse2_composite_copy_area),
-
+    
     /* PIXMAN_OP_OVER_REVERSE */
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8r8g8b8, sse2_composite_over_reverse_n_8888),
     PIXMAN_STD_FAST_PATH (OVER_REVERSE, solid, null, a8b8g8r8, sse2_composite_over_reverse_n_8888),
@@ -6223,6 +6329,7 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     /* PIXMAN_OP_IN */
     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, sse2_composite_in_8_8),
     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, sse2_composite_in_n_8_8),
+    PIXMAN_STD_FAST_PATH (IN, solid, null, a8, sse2_composite_in_n_8),
 
     { PIXMAN_OP_NONE },
 };
commit e0b430a13ee4619bd6d82c4ebff8a401a254e9bc
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Tue Jul 13 00:31:35 2010 -0400

    [sse2] Add sse2_composite_src_x888_8888()
    
    This operation shows up when Firefox displays
    http://dougx.net/plunder/plunder.html

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index d5349d7..2c9f5eb 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -3517,6 +3517,88 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
     _mm_empty ();
 }
 
+/*---------------------------------------------------------------------
+ * composite_over_8888_n_8888
+ */
+
+static void
+sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
+			      pixman_op_t              op,
+			      pixman_image_t *         src_image,
+			      pixman_image_t *         mask_image,
+			      pixman_image_t *         dst_image,
+			      int32_t                  src_x,
+			      int32_t                  src_y,
+			      int32_t                  mask_x,
+			      int32_t                  mask_y,
+			      int32_t                  dest_x,
+			      int32_t                  dest_y,
+			      int32_t                  width,
+			      int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int32_t w;
+    int dst_stride, src_stride;
+
+
+    PIXMAN_IMAGE_GET_LINE (
+	dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (
+	src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+
+	while (w && (unsigned long)dst & 15)
+	{
+	    *dst++ = *src++ | 0xff000000;
+	    w--;
+	}
+
+	/* call prefetch hint to optimize cache load*/
+	cache_prefetch ((__m128i*)src);
+
+	while (w >= 16)
+	{
+	    __m128i xmm_src1, xmm_src2, xmm_src3, xmm_src4;
+	    
+	    /* fill cache line with next memory */
+	    cache_prefetch_next ((__m128i*)src);
+
+	    xmm_src1 = load_128_unaligned ((__m128i*)src + 0);
+	    xmm_src2 = load_128_unaligned ((__m128i*)src + 1);
+	    xmm_src3 = load_128_unaligned ((__m128i*)src + 2);
+	    xmm_src4 = load_128_unaligned ((__m128i*)src + 3);
+	    
+	    save_128_aligned ((__m128i*)dst + 0, _mm_or_si128 (xmm_src1, mask_ff000000));
+	    save_128_aligned ((__m128i*)dst + 1, _mm_or_si128 (xmm_src2, mask_ff000000));
+	    save_128_aligned ((__m128i*)dst + 2, _mm_or_si128 (xmm_src3, mask_ff000000));
+	    save_128_aligned ((__m128i*)dst + 3, _mm_or_si128 (xmm_src4, mask_ff000000));
+	    
+	    dst += 16;
+	    src += 16;
+	    w -= 16;
+	}
+
+	while (w)
+	{
+	    *dst++ = *src++ | 0xff000000;
+	    w--;
+	}
+    }
+
+    _mm_empty ();
+}
+
 /* ---------------------------------------------------------------------
  * composite_over_x888_n_8888
  */
@@ -6127,6 +6209,8 @@ static const pixman_fast_path_t sse2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8r8g8b8, sse2_composite_src_n_8_8888),
     PIXMAN_STD_FAST_PATH (SRC, solid, a8, a8b8g8r8, sse2_composite_src_n_8_8888),
     PIXMAN_STD_FAST_PATH (SRC, solid, a8, x8b8g8r8, sse2_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, sse2_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, sse2_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, sse2_composite_copy_area),
     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, sse2_composite_copy_area),
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, sse2_composite_copy_area),
commit 16bae8347529c1c976e6d7af90e0fb5811605af1
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date:   Tue Jul 13 00:08:10 2010 -0400

    [fast] Add fast_composite_src_x888_8888()
    
    This shows up on when Firefox displays http://dougx.net/plunder/plunder.html

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index c65dfb1..6ed1580 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -724,6 +724,42 @@ fast_composite_over_8888_8888 (pixman_implementation_t *imp,
     }
 }
 
+static void
+fast_composite_src_x888_8888 (pixman_implementation_t *imp,
+			      pixman_op_t              op,
+			      pixman_image_t *         src_image,
+			      pixman_image_t *         mask_image,
+			      pixman_image_t *         dst_image,
+			      int32_t                  src_x,
+			      int32_t                  src_y,
+			      int32_t                  mask_x,
+			      int32_t                  mask_y,
+			      int32_t                  dest_x,
+			      int32_t                  dest_y,
+			      int32_t                  width,
+			      int32_t                  height)
+{
+    uint32_t    *dst_line, *dst;
+    uint32_t    *src_line, *src;
+    int dst_stride, src_stride;
+    int32_t w;
+
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
+
+    while (height--)
+    {
+	dst = dst_line;
+	dst_line += dst_stride;
+	src = src_line;
+	src_line += src_stride;
+	w = width;
+
+	while (w--)
+	    *dst++ = (*src++) | 0xff000000;
+    }
+}
+
 #if 0
 static void
 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
@@ -1805,6 +1841,8 @@ static const pixman_fast_path_t c_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
+    PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
+    PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),


More information about the xorg-commit mailing list