pixman: Branch 'master' - 3 commits

Mon Oct 26 10:06:19 PDT 2009

configure.ac               |    2 
 pixman/pixman-bits-image.c |  344 ++++++++++++++++++++++++++++++++++++++++++---
 pixman/pixman.h            |    1 
 test/scaling-test.c        |   17 +-
 4 files changed, 338 insertions(+), 26 deletions(-)

New commits:
commit 0d5562747ce25ecac06f4c44e935662eb6ee328a
Author: AndrÃ© TupinambÃ¡ <andrelrt at gmail.com>
Date:   Sat Sep 19 23:01:50 2009 -0400

    Add fast path scaled, bilinear fetcher.
    
    This adds a bilinear fetcher for the case where the image has a scaled
    transformation, does not repeat, and the format {ax}8r8g8b8.
    
    Results for the swfdec-youtube benchmark
    
    Before:
    
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [  0]    image               swfdec-youtube    7.841    7.915   0.72%    6/6
    
    After:
    
    [ # ]  backend                         test   min(s) median(s) stddev. count
    [  0]    image               swfdec-youtube    6.677    6.780   0.94%    6/6
    
    These results were measured on a faster machine than the ones in the
    previous commit, so the numbers are not comparable.
    
    Signed-off-by: SÃ¸ren Sandmann Pedersen <sandmann at redhat.com>

diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 288edf4..5a5a690 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -4,6 +4,7 @@
  *             2008 Aaron Plattner, NVIDIA Corporation
  * Copyright Â© 2000 SuSE, Inc.
  * Copyright Â© 2007, 2009 Red Hat, Inc.
+ * Copyright Â© 2008 AndrÃ© TupinambÃ¡ <andrelrt at gmail.com>
  *
  * Permission to use, copy, modify, distribute, and sell this software and its
  * documentation for any purpose is hereby granted without fee, provided that
@@ -28,6 +29,7 @@
 #ifdef HAVE_CONFIG_H
 #include <config.h>
 #endif
+#include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "pixman-private.h"
@@ -186,7 +188,7 @@ bits_image_fetch_pixel_nearest (bits_image_t   *image,
 
 #if SIZEOF_LONG > 4
 
-static inline uint32_t
+static force_inline uint32_t
 bilinear_interpolation (uint32_t tl, uint32_t tr,
 			uint32_t bl, uint32_t br,
 			int distx, int disty)
@@ -230,7 +232,7 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
 
 #else
 
-static inline uint32_t
+static force_inline uint32_t
 bilinear_interpolation (uint32_t tl, uint32_t tr,
 			uint32_t bl, uint32_t br,
 			int distx, int disty)
@@ -239,9 +241,11 @@ bilinear_interpolation (uint32_t tl, uint32_t tr,
     uint32_t f, r;
 
     distxy = distx * disty;
-    distxiy = distx * (256 - disty);
-    distixy = (256 - distx) * disty;
-    distixiy = (256 - distx) * (256 - disty);
+    distxiy = (distx << 8) - distxy;	/* distx * (256 - disty) */
+    distixy = (disty << 8) - distxy;	/* disty * (256 - distx) */
+    distixiy =
+	256 * 256 - (disty << 8) -
+	(distx << 8) + distxy;		/* (256 - distx) * (256 - disty) */
 
     /* Blue */
     r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
@@ -319,6 +323,217 @@ bits_image_fetch_pixel_bilinear (bits_image_t   *image,
     return bilinear_interpolation (tl, tr, bl, br, distx, disty);
 }
 
+static void
+bits_image_fetch_bilinear_no_repeat_8888 (pixman_image_t * ima,
+					  int              offset,
+					  int              line,
+					  int              width,
+					  uint32_t *       buffer,
+					  const uint32_t * mask,
+					  uint32_t         mask_bits)
+{
+    bits_image_t *bits = &ima->bits;
+    pixman_fixed_t x_top, x_bottom, x;
+    pixman_fixed_t ux_top, ux_bottom, ux;
+    pixman_vector_t v;
+    uint32_t top_mask, bottom_mask;
+    uint32_t *top_row;
+    uint32_t *bottom_row;
+    uint32_t *end;
+    uint32_t zero[2] = { 0, 0 };
+    int y, y1, y2;
+    int disty;
+    int mask_inc;
+    int w;
+
+    /* reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (offset) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (line) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (!pixman_transform_point_3d (bits->common.transform, &v))
+	return;
+
+    ux = ux_top = ux_bottom = bits->common.transform->matrix[0][0];
+    x = x_top = x_bottom = v.vector[0] - pixman_fixed_1/2;
+
+    y = v.vector[1] - pixman_fixed_1/2;
+    disty = (y >> 8) & 0xff;
+
+    /* Load the pointers to the first and second lines from the source
+     * image that bilinear code must read.
+     *
+     * The main trick in this code is about the check if any line are
+     * outside of the image;
+     *
+     * When I realize that a line (any one) is outside, I change
+     * the pointer to a dummy area with zeros. Once I change this, I
+     * must be sure the pointer will not change, so I set the
+     * variables to each pointer increments inside the loop.
+     */
+    y1 = pixman_fixed_to_int (y);
+    y2 = y1 + 1;
+
+    if (y1 < 0 || y1 >= bits->height)
+    {
+	top_row = zero;
+	x_top = 0;
+	ux_top = 0;
+    }
+    else
+    {
+	top_row = bits->bits + y1 * bits->rowstride;
+	x_top = x;
+	ux_top = ux;
+    }
+
+    if (y2 < 0 || y2 >= bits->height)
+    {
+	bottom_row = zero;
+	x_bottom = 0;
+	ux_bottom = 0;
+    }
+    else
+    {
+	bottom_row = bits->bits + y2 * bits->rowstride;
+	x_bottom = x;
+	ux_bottom = ux;
+    }
+
+    /* Instead of checking whether the operation uses the mast in
+     * each loop iteration, verify this only once and prepare the
+     * variables to make the code smaller inside the loop.
+     */
+    if (!mask)
+    {
+        mask_inc = 0;
+        mask_bits = 1;
+        mask = &mask_bits;
+    }
+    else
+    {
+        /* If have a mask, prepare the variables to check it */
+        mask_inc = 1;
+    }
+
+    /* If both are zero, then the whole thing is zero */
+    if (top_row == zero && bottom_row == zero)
+    {
+	memset (buffer, 0, width * sizeof (uint32_t));
+	return;
+    }
+    else if (bits->format == PIXMAN_x8r8g8b8)
+    {
+	if (top_row == zero)
+	{
+	    top_mask = 0;
+	    bottom_mask = 0xff000000;
+	}
+	else if (bottom_row == zero)
+	{
+	    top_mask = 0xff000000;
+	    bottom_mask = 0;
+	}
+	else
+	{
+	    top_mask = 0xff000000;
+	    bottom_mask = 0xff000000;
+	}
+    }
+    else
+    {
+	top_mask = 0;
+	bottom_mask = 0;
+    }
+
+    end = buffer + width;
+
+    /* Zero fill to the left of the image */
+    while (buffer < end && x < pixman_fixed_minus_1)
+    {
+	*buffer++ = 0;
+	x += ux;
+	x_top += ux_top;
+	x_bottom += ux_bottom;
+	mask += mask_inc;
+    }
+
+    /* Left edge
+     */
+    while (buffer < end && x < 0)
+    {
+	uint32_t tr, br;
+	int32_t distx;
+
+	tr = top_row[pixman_fixed_to_int (x_top) + 1] | top_mask;
+	br = bottom_row[pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+
+	distx = (x >> 8) & 0xff;
+
+	*buffer++ = bilinear_interpolation (0, tr, 0, br, distx, disty);
+
+	x += ux;
+	x_top += ux_top;
+	x_bottom += ux_bottom;
+	mask += mask_inc;
+    }
+
+    /* Main part */
+    w = pixman_int_to_fixed (bits->width - 1);
+
+    while (buffer < end  &&  x < w)
+    {
+	if (*mask)
+	{
+	    uint32_t tl, tr, bl, br;
+	    int32_t distx;
+
+	    tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+	    tr = top_row [pixman_fixed_to_int (x_top) + 1] | top_mask;
+	    bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+	    br = bottom_row [pixman_fixed_to_int (x_bottom) + 1] | bottom_mask;
+
+	    distx = (x >> 8) & 0xff;
+
+	    *buffer = bilinear_interpolation (tl, tr, bl, br, distx, disty);
+	}
+
+	buffer++;
+	x += ux;
+	x_top += ux_top;
+	x_bottom += ux_bottom;
+	mask += mask_inc;
+    }
+
+    /* Right Edge */
+    w = pixman_int_to_fixed (bits->width);
+    while (buffer < end  &&  x < w)
+    {
+	if (*mask)
+	{
+	    uint32_t tl, bl;
+	    int32_t distx;
+
+	    tl = top_row [pixman_fixed_to_int (x_top)] | top_mask;
+	    bl = bottom_row [pixman_fixed_to_int (x_bottom)] | bottom_mask;
+
+	    distx = (x >> 8) & 0xff;
+
+	    *buffer = bilinear_interpolation (tl, 0, bl, 0, distx, disty);
+	}
+
+	buffer++;
+	x += ux;
+	x_top += ux_top;
+	x_bottom += ux_bottom;
+	mask += mask_inc;
+    }
+
+    /* Zero fill to the left of the image */
+    while (buffer < end)
+	*buffer++ = 0;
+}
+
 static force_inline uint32_t
 bits_image_fetch_pixel_convolution (bits_image_t   *image,
 				    pixman_fixed_t  x,
@@ -741,6 +956,24 @@ bits_image_property_changed (pixman_image_t *image)
 	image->common.get_scanline_64 = bits_image_fetch_untransformed_64;
 	image->common.get_scanline_32 = bits_image_fetch_untransformed_32;
     }
+    else if (bits->common.transform					&&
+	     bits->common.transform->matrix[2][0] == 0			&&
+	     bits->common.transform->matrix[2][1] == 0			&&
+	     bits->common.transform->matrix[2][2] == pixman_fixed_1	&&
+	     bits->common.transform->matrix[0][0] > 0			&&
+	     bits->common.transform->matrix[1][0] == 0			&&
+	     (bits->common.filter == PIXMAN_FILTER_BILINEAR ||
+	      bits->common.filter == PIXMAN_FILTER_GOOD	    ||
+	      bits->common.filter == PIXMAN_FILTER_BEST)		&&
+	     bits->common.repeat == PIXMAN_REPEAT_NONE			&&
+	     (bits->format == PIXMAN_a8r8g8b8	||
+	      bits->format == PIXMAN_x8r8g8b8))
+    {
+	image->common.get_scanline_64 =
+	    _pixman_image_get_scanline_generic_64;
+	image->common.get_scanline_32 =
+	    bits_image_fetch_bilinear_no_repeat_8888;
+    }
     else
     {
 	image->common.get_scanline_64 =
diff --git a/pixman/pixman.h b/pixman/pixman.h
index 35a5eb7..2165605 100644
--- a/pixman/pixman.h
+++ b/pixman/pixman.h
@@ -109,6 +109,7 @@ typedef pixman_fixed_16_16_t	pixman_fixed_t;
 #define pixman_fixed_e			((pixman_fixed_t) 1)
 #define pixman_fixed_1			(pixman_int_to_fixed(1))
 #define pixman_fixed_1_minus_e		(pixman_fixed_1 - pixman_fixed_e)
+#define pixman_fixed_minus_1		(pixman_int_to_fixed(-1))
 #define pixman_fixed_to_int(f)		((int) ((f) >> 16))
 #define pixman_int_to_fixed(i)		((pixman_fixed_t) ((i) << 16))
 #define pixman_fixed_to_double(f)	(double) ((f) / (double) pixman_fixed_1)
commit 88323c5abe68906472049537b54b0e7eea343f43
Author: AndrÃ© TupinambÃ¡ <andrelrt at gmail.com>
Date:   Sat Sep 19 09:32:37 2009 -0400

    Speed up bilinear interpolation.
    
    Speed up bilinear interpolation by processing more than one component
    at a time on 64 bit architectures, and by precomputing the dist{ixiy}
    products on 32 bit architectures.
    
    Previously bilinear interpolation for one pixel would take 24
    multiplications. With this improvement it takes 12 on 64 bit, and 20
    on 32 bit.
    
    This is a small but consistent speedup on the swfdec-youtube
    benchmark:
    
    [ # ]  backend                         test   min(s) median(s) stddev. count
    Before:
    [  0]    image               swfdec-youtube   18.010   18.020   0.09%    4/5
    
    After:
    [  0]    image               swfdec-youtube   17.488   17.584   0.22%    5/6
    
    Signed-off-by: SÃ¸ren Sandmann Pedersen <sandmann at redhat.com>

diff --git a/configure.ac b/configure.ac
index c3e339c..12c7f75 100644
--- a/configure.ac
+++ b/configure.ac
@@ -76,6 +76,8 @@ AC_CHECK_FUNCS([getisax])
 AC_C_BIGENDIAN
 AC_C_INLINE
 
+AC_CHECK_SIZEOF(long)
+
 # Checks for Sun Studio compilers
 AC_CHECK_DECL([__SUNPRO_C], [SUNCC="yes"], [SUNCC="no"])
 AC_CHECK_DECL([__amd64], [AMD64_ABI="yes"], [AMD64_ABI="no"])
diff --git a/pixman/pixman-bits-image.c b/pixman/pixman-bits-image.c
index 9e93623..288edf4 100644
--- a/pixman/pixman-bits-image.c
+++ b/pixman/pixman-bits-image.c
@@ -184,6 +184,95 @@ bits_image_fetch_pixel_nearest (bits_image_t   *image,
     }
 }
 
+#if SIZEOF_LONG > 4
+
+static inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+			uint32_t bl, uint32_t br,
+			int distx, int disty)
+{
+    uint64_t distxy, distxiy, distixy, distixiy;
+    uint64_t tl64, tr64, bl64, br64;
+    uint64_t f, r;
+
+    distxy = distx * disty;
+    distxiy = distx * (256 - disty);
+    distixy = (256 - distx) * disty;
+    distixiy = (256 - distx) * (256 - disty);
+
+    /* Alpha and Blue */
+    tl64 = tl & 0xff0000ff;
+    tr64 = tr & 0xff0000ff;
+    bl64 = bl & 0xff0000ff;
+    br64 = br & 0xff0000ff;
+
+    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+    r = f & 0x0000ff0000ff0000ull;
+
+    /* Red and Green */
+    tl64 = tl;
+    tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull);
+
+    tr64 = tr;
+    tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull);
+
+    bl64 = bl;
+    bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull);
+
+    br64 = br;
+    br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull);
+
+    f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy;
+    r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull);
+
+    return (uint32_t)(r >> 16);
+}
+
+#else
+
+static inline uint32_t
+bilinear_interpolation (uint32_t tl, uint32_t tr,
+			uint32_t bl, uint32_t br,
+			int distx, int disty)
+{
+    int distxy, distxiy, distixy, distixiy;
+    uint32_t f, r;
+
+    distxy = distx * disty;
+    distxiy = distx * (256 - disty);
+    distixy = (256 - distx) * disty;
+    distixiy = (256 - distx) * (256 - disty);
+
+    /* Blue */
+    r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
+
+    /* Green */
+    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
+    r |= f & 0xff000000;
+
+    tl >>= 16;
+    tr >>= 16;
+    bl >>= 16;
+    br >>= 16;
+    r >>= 16;
+
+    /* Red */
+    f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy
+      + (bl & 0x000000ff) * distixy  + (br & 0x000000ff) * distxy;
+    r |= f & 0x00ff0000;
+
+    /* Alpha */
+    f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy
+      + (bl & 0x0000ff00) * distixy  + (br & 0x0000ff00) * distxy;
+    r |= f & 0xff000000;
+
+    return r;
+}
+
+#endif
+
 static force_inline uint32_t
 bits_image_fetch_pixel_bilinear (bits_image_t   *image,
 				 pixman_fixed_t  x,
@@ -193,9 +282,8 @@ bits_image_fetch_pixel_bilinear (bits_image_t   *image,
     int width = image->width;
     int height = image->height;
     int x1, y1, x2, y2;
-    uint32_t tl, tr, bl, br, r;
-    int32_t distx, disty, idistx, idisty;
-    uint32_t ft, fb;
+    uint32_t tl, tr, bl, br;
+    int32_t distx, disty;
 
     x1 = x - pixman_fixed_1 / 2;
     y1 = y - pixman_fixed_1 / 2;
@@ -214,7 +302,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t   *image,
 	repeat (repeat_mode, height, &y1);
 	repeat (repeat_mode, width, &x2);
 	repeat (repeat_mode, height, &y2);
-	
+
 	tl = get_pixel (image, x1, y1, FALSE);
 	bl = get_pixel (image, x1, y2, FALSE);
 	tr = get_pixel (image, x2, y1, FALSE);
@@ -228,24 +316,7 @@ bits_image_fetch_pixel_bilinear (bits_image_t   *image,
 	br = get_pixel (image, x2, y2, TRUE);
     }
 
-    idistx = 256 - distx;
-    idisty = 256 - disty;
-
-#define GET8(v, i)   ((uint16_t) (uint8_t) ((v) >> i))
-    ft = GET8 (tl, 0) * idistx + GET8 (tr, 0) * distx;
-    fb = GET8 (bl, 0) * idistx + GET8 (br, 0) * distx;
-    r = (((ft * idisty + fb * disty) >> 16) & 0xff);
-    ft = GET8 (tl, 8) * idistx + GET8 (tr, 8) * distx;
-    fb = GET8 (bl, 8) * idistx + GET8 (br, 8) * distx;
-    r |= (((ft * idisty + fb * disty) >> 8) & 0xff00);
-    ft = GET8 (tl, 16) * idistx + GET8 (tr, 16) * distx;
-    fb = GET8 (bl, 16) * idistx + GET8 (br, 16) * distx;
-    r |= (((ft * idisty + fb * disty)) & 0xff0000);
-    ft = GET8 (tl, 24) * idistx + GET8 (tr, 24) * distx;
-    fb = GET8 (bl, 24) * idistx + GET8 (br, 24) * distx;
-    r |= (((ft * idisty + fb * disty) << 8) & 0xff000000);
-
-    return r;
+    return bilinear_interpolation (tl, tr, bl, br, distx, disty);
 }
 
 static force_inline uint32_t
commit f0c157f888185279681bad305973f246dca2e535
Author: SÃ¸ren Sandmann Pedersen <sandmann at redhat.com>
Date:   Sun Sep 27 09:41:25 2009 -0400

    Extend scaling-test to also test bilinear filtering.

diff --git a/test/scaling-test.c b/test/scaling-test.c
index c158c23..e7686cd 100644
--- a/test/scaling-test.c
+++ b/test/scaling-test.c
@@ -137,7 +137,7 @@ compute_crc32 (uint32_t    in_crc32,
 	0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
 	0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D
     };
-    
+
     uint32_t              crc32;
     unsigned char *       byte_buf;
     size_t                i;
@@ -148,7 +148,7 @@ compute_crc32 (uint32_t    in_crc32,
 
     for (i = 0; i < buf_len; i++)
 	crc32 = (crc32 >> 8) ^ crc_table[(crc32 ^ byte_buf[i]) & 0xFF];
-    
+
     return (crc32 ^ 0xFFFFFFFF);
 }
 
@@ -170,7 +170,7 @@ image_endian_swap (pixman_image_t *img,
     for (i = 0; i < height; i++)
     {
 	char *line_data = (char *)data + stride * i;
-	
+
 	/* swap bytes only for 16, 24 and 32 bpp for now */
 	switch (bpp)
 	{
@@ -266,7 +266,7 @@ test_composite (uint32_t initcrc,
 
     if (src_stride & 3)
 	src_stride += 2;
-    
+
     if (dst_stride & 3)
 	dst_stride += 2;
 
@@ -332,6 +332,11 @@ test_composite (uint32_t initcrc,
     }
     pixman_image_set_repeat (src_img, repeat);
 
+    if (lcg_rand_n (2))
+	pixman_image_set_filter (src_img, PIXMAN_FILTER_NEAREST, NULL, 0);
+    else
+	pixman_image_set_filter (src_img, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
     if (verbose)
     {
 	printf ("src_fmt=%08X, dst_fmt=%08X\n", src_fmt, dst_fmt);
@@ -365,7 +370,7 @@ test_composite (uint32_t initcrc,
 		        clip_boxes[i].x2, clip_boxes[i].y2);
 	    }
 	}
-	
+
 	pixman_region_init_rects (&clip, clip_boxes, n);
 	pixman_image_set_clip_region (src_img, &clip);
 	pixman_image_set_source_clipping (src_img, 1);
@@ -461,7 +466,7 @@ main (int   argc, char *argv[])
 	    /* predefined value for running with all the fastpath functions disabled  */
 	    /* it needs to be updated every time changes are introduced to this program! */
 
-	    if (crc == 0x0B633CF4)
+	    if (crc == 0x2168ACD1)
 	    {
 		printf ("scaling test passed\n");
 	    }