pixman: Branch 'master' - 3 commits

Siarhei Siamashka siamashka at kemper.freedesktop.org
Thu Feb 10 07:11:37 PST 2011


 pixman/pixman-fast-path.c |  292 ++++++++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-image.c     |   17 ++
 pixman/pixman-private.h   |    3 
 test/affine-test.c        |   34 ++++-
 4 files changed, 342 insertions(+), 4 deletions(-)

New commits:
commit 8d359b00c5bb9960c3c584a7f77a943c0ce61368
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Fri Jul 30 18:37:51 2010 +0300

    C fast paths for a simple 90/270 degrees rotation
    
    Depending on CPU architecture, performance is in the range of 1.5 to 4 times
    slower than simple nonrotated copy (which would be an ideal case, perfectly
    utilizing memory bandwidth), but still is more than 7 times faster if
    compared to general path.
    
    This implementation sets a performance baseline for rotation. The use
    of SIMD instructions may further improve memory bandwidth utilization.

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index eb09715..771ec9d 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -1616,6 +1616,272 @@ fast_composite_scaled_nearest (pixman_implementation_t *imp,
     }
 }
 
+#define CACHE_LINE_SIZE 64
+
+#define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
+                                                                              \
+static void                                                                   \
+blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
+				 int             dst_stride,                  \
+				 const pix_type *src,                         \
+				 int             src_stride,                  \
+				 int             w,                           \
+				 int             h)                           \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++)                                                   \
+    {                                                                         \
+	const pix_type *s = src + (h - y - 1);                                \
+	pix_type *d = dst + dst_stride * y;                                   \
+	for (x = 0; x < w; x++)                                               \
+	{                                                                     \
+	    *d++ = *s;                                                        \
+	    s += src_stride;                                                  \
+	}                                                                     \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
+				  int             dst_stride,                 \
+				  const pix_type *src,                        \
+				  int             src_stride,                 \
+				  int             w,                          \
+				  int             h)                          \
+{                                                                             \
+    int x, y;                                                                 \
+    for (y = 0; y < h; y++)                                                   \
+    {                                                                         \
+	const pix_type *s = src + src_stride * (w - 1) + y;                   \
+	pix_type *d = dst + dst_stride * y;                                   \
+	for (x = 0; x < w; x++)                                               \
+	{                                                                     \
+	    *d++ = *s;                                                        \
+	    s -= src_stride;                                                  \
+	}                                                                     \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_90_##suffix (pix_type       *dst,                                 \
+			 int             dst_stride,                          \
+			 const pix_type *src,                                 \
+			 int             src_stride,                          \
+			 int             W,                                   \
+			 int             H)                                   \
+{                                                                             \
+    int x;                                                                    \
+    int leading_pixels = 0, trailing_pixels = 0;                              \
+    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
+                                                                              \
+    /*                                                                        \
+     * split processing into handling destination as TILE_SIZExH cache line   \
+     * aligned vertical stripes (optimistically assuming that destination     \
+     * stride is a multiple of cache line, if not - it will be just a bit     \
+     * slower)                                                                \
+     */                                                                       \
+                                                                              \
+    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
+    {                                                                         \
+	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (leading_pixels > W)                                               \
+	    leading_pixels = W;                                               \
+                                                                              \
+	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
+	blt_rotated_90_trivial_##suffix (                                     \
+	    dst,                                                              \
+	    dst_stride,                                                       \
+	    src,                                                              \
+	    src_stride,                                                       \
+	    leading_pixels,                                                   \
+	    H);                                                               \
+	                                                                      \
+	dst += leading_pixels;                                                \
+	src += leading_pixels * src_stride;                                   \
+	W -= leading_pixels;                                                  \
+    }                                                                         \
+                                                                              \
+    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
+    {                                                                         \
+	trailing_pixels = (((uintptr_t)(dst + W) &                            \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (trailing_pixels > W)                                              \
+	    trailing_pixels = W;                                              \
+	W -= trailing_pixels;                                                 \
+    }                                                                         \
+                                                                              \
+    for (x = 0; x < W; x += TILE_SIZE)                                        \
+    {                                                                         \
+	/* aligned middle part TILE_SIZExH */                                 \
+	blt_rotated_90_trivial_##suffix (                                     \
+	    dst + x,                                                          \
+	    dst_stride,                                                       \
+	    src + src_stride * x,                                             \
+	    src_stride,                                                       \
+	    TILE_SIZE,                                                        \
+	    H);                                                               \
+    }                                                                         \
+                                                                              \
+    if (trailing_pixels)                                                      \
+    {                                                                         \
+	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
+	blt_rotated_90_trivial_##suffix (                                     \
+	    dst + W,                                                          \
+	    dst_stride,                                                       \
+	    src + W * src_stride,                                             \
+	    src_stride,                                                       \
+	    trailing_pixels,                                                  \
+	    H);                                                               \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+blt_rotated_270_##suffix (pix_type       *dst,                                \
+			  int             dst_stride,                         \
+			  const pix_type *src,                                \
+			  int             src_stride,                         \
+			  int             W,                                  \
+			  int             H)                                  \
+{                                                                             \
+    int x;                                                                    \
+    int leading_pixels = 0, trailing_pixels = 0;                              \
+    const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
+                                                                              \
+    /*                                                                        \
+     * split processing into handling destination as TILE_SIZExH cache line   \
+     * aligned vertical stripes (optimistically assuming that destination     \
+     * stride is a multiple of cache line, if not - it will be just a bit     \
+     * slower)                                                                \
+     */                                                                       \
+                                                                              \
+    if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
+    {                                                                         \
+	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (leading_pixels > W)                                               \
+	    leading_pixels = W;                                               \
+                                                                              \
+	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
+	blt_rotated_270_trivial_##suffix (                                    \
+	    dst,                                                              \
+	    dst_stride,                                                       \
+	    src + src_stride * (W - leading_pixels),                          \
+	    src_stride,                                                       \
+	    leading_pixels,                                                   \
+	    H);                                                               \
+	                                                                      \
+	dst += leading_pixels;                                                \
+	W -= leading_pixels;                                                  \
+    }                                                                         \
+                                                                              \
+    if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
+    {                                                                         \
+	trailing_pixels = (((uintptr_t)(dst + W) &                            \
+			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
+	if (trailing_pixels > W)                                              \
+	    trailing_pixels = W;                                              \
+	W -= trailing_pixels;                                                 \
+	src += trailing_pixels * src_stride;                                  \
+    }                                                                         \
+                                                                              \
+    for (x = 0; x < W; x += TILE_SIZE)                                        \
+    {                                                                         \
+	/* aligned middle part TILE_SIZExH */                                 \
+	blt_rotated_270_trivial_##suffix (                                    \
+	    dst + x,                                                          \
+	    dst_stride,                                                       \
+	    src + src_stride * (W - x - TILE_SIZE),                           \
+	    src_stride,                                                       \
+	    TILE_SIZE,                                                        \
+	    H);                                                               \
+    }                                                                         \
+                                                                              \
+    if (trailing_pixels)                                                      \
+    {                                                                         \
+	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
+	blt_rotated_270_trivial_##suffix (                                    \
+	    dst + W,                                                          \
+	    dst_stride,                                                       \
+	    src - trailing_pixels * src_stride,                               \
+	    src_stride,                                                       \
+	    trailing_pixels,                                                  \
+	    H);                                                               \
+    }                                                                         \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
+				   pixman_op_t              op,               \
+				   pixman_image_t *         src_image,        \
+				   pixman_image_t *         mask_image,       \
+				   pixman_image_t *         dst_image,        \
+				   int32_t                  src_x,            \
+				   int32_t                  src_y,            \
+				   int32_t                  mask_x,           \
+				   int32_t                  mask_y,           \
+				   int32_t                  dest_x,           \
+				   int32_t                  dest_y,           \
+				   int32_t                  width,            \
+				   int32_t                  height)           \
+{                                                                             \
+    pix_type       *dst_line;                                                 \
+    pix_type       *src_line;                                                 \
+    int             dst_stride, src_stride;                                   \
+    int             src_x_t, src_y_t;                                         \
+                                                                              \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type,               \
+			   dst_stride, dst_line, 1);                          \
+    src_x_t = -src_y + pixman_fixed_to_int (                                  \
+				src_image->common.transform->matrix[0][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
+    src_y_t = src_x + pixman_fixed_to_int (                                   \
+				src_image->common.transform->matrix[1][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e);         \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
+			   src_stride, src_line, 1);                          \
+    blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
+			     width, height);                                  \
+}                                                                             \
+                                                                              \
+static void                                                                   \
+fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
+				    pixman_op_t              op,              \
+				    pixman_image_t *         src_image,       \
+				    pixman_image_t *         mask_image,      \
+				    pixman_image_t *         dst_image,       \
+				    int32_t                  src_x,           \
+				    int32_t                  src_y,           \
+				    int32_t                  mask_x,          \
+				    int32_t                  mask_y,          \
+				    int32_t                  dest_x,          \
+				    int32_t                  dest_y,          \
+				    int32_t                  width,           \
+				    int32_t                  height)          \
+{                                                                             \
+    pix_type       *dst_line;                                                 \
+    pix_type       *src_line;                                                 \
+    int             dst_stride, src_stride;                                   \
+    int             src_x_t, src_y_t;                                         \
+                                                                              \
+    PIXMAN_IMAGE_GET_LINE (dst_image, dest_x, dest_y, pix_type,               \
+			   dst_stride, dst_line, 1);                          \
+    src_x_t = src_y + pixman_fixed_to_int (                                   \
+				src_image->common.transform->matrix[0][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e);         \
+    src_y_t = -src_x + pixman_fixed_to_int (                                  \
+				src_image->common.transform->matrix[1][2] +   \
+				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
+    PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
+			   src_stride, src_line, 1);                          \
+    blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
+			      width, height);                                 \
+}
+
+FAST_SIMPLE_ROTATE (8, uint8_t)
+FAST_SIMPLE_ROTATE (565, uint16_t)
+FAST_SIMPLE_ROTATE (8888, uint32_t)
+
 static const pixman_fast_path_t c_fast_paths[] =
 {
     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
@@ -1734,6 +2000,32 @@ static const pixman_fast_path_t c_fast_paths[] =
     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
 
+#define SIMPLE_ROTATE_FLAGS(angle)					  \
+    (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
+     FAST_PATH_NEAREST_FILTER			|			  \
+     FAST_PATH_SAMPLES_COVER_CLIP		|			  \
+     FAST_PATH_STANDARD_FLAGS)
+
+#define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
+    {   PIXMAN_OP_ ## op,						  \
+	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
+	PIXMAN_null, 0,							  \
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
+	fast_composite_rotate_90_##suffix,				  \
+    },									  \
+    {   PIXMAN_OP_ ## op,						  \
+	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
+	PIXMAN_null, 0,							  \
+	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
+	fast_composite_rotate_270_##suffix,				  \
+    }
+
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
+    SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
+    SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
+
     {   PIXMAN_OP_NONE	},
 };
 
commit e0c7948c970b816f323a6402241ca70fa855c12c
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Thu Jul 29 17:58:13 2010 +0300

    New flags for 90/180/270 rotation
    
    These flags are set when the transform is a simple nonscaled 90/180/270
    degrees rotation.

diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index a72299b..e91d87c 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -211,8 +211,25 @@ compute_image_info (pixman_image_t *image)
 	    if (image->common.transform->matrix[0][1] == 0 &&
 		image->common.transform->matrix[1][0] == 0)
 	    {
+		if (image->common.transform->matrix[0][0] == -pixman_fixed_1 &&
+		    image->common.transform->matrix[1][1] == -pixman_fixed_1)
+		{
+		    flags |= FAST_PATH_ROTATE_180_TRANSFORM;
+		}
 		flags |= FAST_PATH_SCALE_TRANSFORM;
 	    }
+	    else if (image->common.transform->matrix[0][0] == 0 &&
+	             image->common.transform->matrix[1][1] == 0)
+	    {
+		pixman_fixed_t m01 = image->common.transform->matrix[0][1];
+		if (m01 == -image->common.transform->matrix[1][0])
+		{
+			if (m01 == -pixman_fixed_1)
+			    flags |= FAST_PATH_ROTATE_90_TRANSFORM;
+			else if (m01 == pixman_fixed_1)
+			    flags |= FAST_PATH_ROTATE_270_TRANSFORM;
+		}
+	    }
 	}
 
 	if (image->common.transform->matrix[0][0] > 0)
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index f5d0ba1..1473dc4 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -611,6 +611,9 @@ _pixman_iter_get_scanline_noop (pixman_iter_t *iter, const uint32_t *mask);
 #define FAST_PATH_AFFINE_TRANSFORM		(1 << 18)
 #define FAST_PATH_Y_UNIT_ZERO			(1 << 19)
 #define FAST_PATH_BILINEAR_FILTER		(1 << 20)
+#define FAST_PATH_ROTATE_90_TRANSFORM		(1 << 21)
+#define FAST_PATH_ROTATE_180_TRANSFORM		(1 << 22)
+#define FAST_PATH_ROTATE_270_TRANSFORM		(1 << 23)
 
 #define FAST_PATH_PAD_REPEAT						\
     (FAST_PATH_NO_NONE_REPEAT		|				\
commit 3b68c295fd45297a631569b35608364dbcb6d452
Author: Siarhei Siamashka <siarhei.siamashka at nokia.com>
Date:   Tue Oct 26 15:40:01 2010 +0300

    test: affine-test updated to stress 90/180/270 degrees rotation more

diff --git a/test/affine-test.c b/test/affine-test.c
index f7f058a..b7a1fa6 100644
--- a/test/affine-test.c
+++ b/test/affine-test.c
@@ -112,10 +112,36 @@ test_composite (int      testnum,
 
     if (lcg_rand_n (4) > 0)
     {
-	int c = lcg_rand_N (2 * 65536) - 65536;
-	int s = lcg_rand_N (2 * 65536) - 65536;
-	
+	int c, s, tx = 0, ty = 0;
+	switch (lcg_rand_n (4))
+	{
+	case 0:
+	    /* 90 degrees */
+	    c = 0;
+	    s = pixman_fixed_1;
+	    tx = pixman_int_to_fixed (MAX_SRC_HEIGHT);
+	    break;
+	case 1:
+	    /* 180 degrees */
+	    c = -pixman_fixed_1;
+	    s = 0;
+	    tx = pixman_int_to_fixed (MAX_SRC_WIDTH);
+	    ty = pixman_int_to_fixed (MAX_SRC_HEIGHT);
+	    break;
+	case 2:
+	    /* 270 degrees */
+	    c = 0;
+	    s = -pixman_fixed_1;
+	    ty = pixman_int_to_fixed (MAX_SRC_WIDTH);
+	    break;
+	default:
+	    /* arbitrary rotation */
+	    c = lcg_rand_N (2 * 65536) - 65536;
+	    s = lcg_rand_N (2 * 65536) - 65536;
+	    break;
+	}
 	pixman_transform_rotate (&transform, NULL, c, s);
+	pixman_transform_translate (&transform, NULL, tx, ty);
     }
 
     pixman_image_set_transform (src_img, &transform);
@@ -256,6 +282,6 @@ main (int argc, const char *argv[])
 {
     pixman_disable_out_of_bounds_workaround ();
 
-    return fuzzer_test_main ("affine", 8000000, 0x46EC3C6A,
+    return fuzzer_test_main ("affine", 8000000, 0x4B5D1852,
 			     test_composite, argc, argv);
 }


More information about the xorg-commit mailing list