pixman: Branch 'master' - 3 commits

Sat Aug 10 08:16:57 PDT 2013

pixman/pixman-fast-path.c      |  241 +++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-general.c        |    7 +
 pixman/pixman-image.c          |    3 
 pixman/pixman-implementation.c |    1 
 pixman/pixman-private.h        |    2 
 test/Makefile.sources          |    1 
 test/scaling-bench.c           |   69 +++++++++++
 7 files changed, 324 insertions(+)

New commits:
commit 3518a0dafa63098d41e466f73d105b7e3e4b12de
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Thu May 24 02:49:05 2012 -0400

    Add an iterator that can fetch bilinearly scaled images
    
    This new iterator works in a separable way; that is, for a destination
    scaline, it scales the two involved source scanlines and then caches
    them so that they can be reused for the next destination scanlines.
    
    There are two versions of the code, one that uses 64 bit arithmetic,
    and one that uses 32 bit arithmetic only. The latter version is
    used on 32 bit systems, where it is expected to be faster.
    
    This scheme saves a substantial amount of arithmetic for larger
    scalings; the per-pixel times for various configurations as reported
    by scaling-bench are graphed here:
    
    	http://people.freedesktop.org/~sandmann/separable.v2/v2.png
    
    The "sse2" graph is current default on x86, "mmx" is with sse2
    disabled, "old c" is with sse2 and mmx disabled. The "new 32" and "new
    64" graphs show times for the new code. As the graphs show, the 64 bit
    version of the new code beats the "old c" for all scaling ratios.
    
    The data was taken on a Sandy Bridge Core i3-2350M CPU @ 2.0 GHz
    running in 64 bit mode.
    
    The data used to generate the graph is available in this directory:
    
        http://people.freedesktop.org/~sandmann/separable.v2/
    
    There is also a Gnumeric spreadsheet v2.gnumeric containing the
    per-pixel values and the graph.
    
    V2:
    - Add error message in the OOM/bad matrix case
    - Save some shifts by storing the cached scanlines in AGBR order
    - Special cased version that uses 32 bit arithmetic when sizeof(long) <= 4

diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index 3982dce..2608268 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -2261,6 +2261,237 @@ fast_write_back_r5g6b5 (pixman_iter_t *iter)
     }
 }
 
+typedef struct
+{
+    int		y;
+    uint64_t *	buffer;
+} line_t;
+
+typedef struct
+{
+    line_t		line0;
+    line_t		line1;
+    pixman_fixed_t	y;
+    pixman_fixed_t	x;
+    uint64_t		data[1];
+} bilinear_info_t;
+
+static void
+fetch_horizontal (bits_image_t *image, line_t *line,
+		  int y, pixman_fixed_t x, pixman_fixed_t ux, int n)
+{
+    uint32_t *bits = image->bits + y * image->rowstride;
+    int i;
+
+    for (i = 0; i < n; ++i)
+    {
+	int x0 = pixman_fixed_to_int (x);
+	int x1 = x0 + 1;
+	int32_t dist_x;
+
+	uint32_t left = *(bits + x0);
+	uint32_t right = *(bits + x1);
+
+	dist_x = pixman_fixed_to_bilinear_weight (x);
+	dist_x <<= (8 - BILINEAR_INTERPOLATION_BITS);
+
+#if SIZEOF_LONG <= 4
+	{
+	    uint32_t lag, rag, ag;
+	    uint32_t lrb, rrb, rb;
+
+	    lag = (left & 0xff00ff00) >> 8;
+	    rag = (right & 0xff00ff00) >> 8;
+	    ag = (lag << 8) + dist_x * (rag - lag);
+
+	    lrb = (left & 0x00ff00ff);
+	    rrb = (right & 0x00ff00ff);
+	    rb = (lrb << 8) + dist_x * (rrb - lrb);
+
+	    *((uint32_t *)(line->buffer + i)) = ag;
+	    *((uint32_t *)(line->buffer + i) + 1) = rb;
+	}
+#else
+	{
+	    uint64_t lagrb, ragrb;
+	    uint32_t lag, rag;
+	    uint32_t lrb, rrb;
+
+	    lag = (left & 0xff00ff00);
+	    lrb = (left & 0x00ff00ff);
+	    rag = (right & 0xff00ff00);
+	    rrb = (right & 0x00ff00ff);
+	    lagrb = (((uint64_t)lag) << 24) | lrb;
+	    ragrb = (((uint64_t)rag) << 24) | rrb;
+
+	    line->buffer[i] = (lagrb << 8) + dist_x * (ragrb - lagrb);
+	}
+#endif
+
+	x += ux;
+    }
+
+    line->y = y;
+}
+
+static uint32_t *
+fast_fetch_bilinear_cover (pixman_iter_t *iter, const uint32_t *mask)
+{
+    pixman_fixed_t fx, ux;
+    bilinear_info_t *info = iter->data;
+    line_t *line0, *line1;
+    int y0, y1;
+    int32_t dist_y;
+    int i;
+
+    fx = info->x;
+    ux = iter->image->common.transform->matrix[0][0];
+
+    y0 = pixman_fixed_to_int (info->y);
+    y1 = y0 + 1;
+    dist_y = pixman_fixed_to_bilinear_weight (info->y);
+    dist_y <<= (8 - BILINEAR_INTERPOLATION_BITS);
+
+    line0 = &info->line0;
+    line1 = &info->line1;
+
+    if (line0->y != y0 || line1->y != y1)
+    {
+	if (line0->y == y1 || line1->y == y0)
+	{
+	    line_t tmp = *line0;
+	    *line0 = *line1;
+	    *line1 = tmp;
+	}
+
+	if (line0->y != y0)
+	{
+	    fetch_horizontal (
+		&iter->image->bits, line0, y0, fx, ux, iter->width);
+	}
+
+	if (line1->y != y1)
+	{
+	    fetch_horizontal (
+		&iter->image->bits, line1, y1, fx, ux, iter->width);
+	}
+    }
+
+    for (i = 0; i < iter->width; ++i)
+    {
+#if SIZEOF_LONG <= 4
+	uint32_t ta, tr, tg, tb;
+	uint32_t ba, br, bg, bb;
+	uint32_t tag, trb;
+	uint32_t bag, brb;
+	uint32_t a, r, g, b;
+
+	tag = *((uint32_t *)(line0->buffer + i));
+	trb = *((uint32_t *)(line0->buffer + i) + 1);
+	bag = *((uint32_t *)(line1->buffer + i));
+	brb = *((uint32_t *)(line1->buffer + i) + 1);
+
+	ta = tag >> 16;
+	ba = bag >> 16;
+	a = (ta << 8) + dist_y * (ba - ta);
+
+	tr = trb >> 16;
+	br = brb >> 16;
+	r = (tr << 8) + dist_y * (br - tr);
+
+	tg = tag & 0xffff;
+	bg = bag & 0xffff;
+	g = (tg << 8) + dist_y * (bg - tg);
+	
+	tb = trb & 0xffff;
+	bb = brb & 0xffff;
+	b = (tb << 8) + dist_y * (bb - tb);
+
+	a = (a <<  8) & 0xff000000;
+	r = (r <<  0) & 0x00ff0000;
+	g = (g >>  8) & 0x0000ff00;
+	b = (b >> 16) & 0x000000ff;
+#else
+	uint64_t top = line0->buffer[i];
+	uint64_t bot = line1->buffer[i];
+	uint64_t tar = (top & 0xffff0000ffff0000ULL) >> 16;
+	uint64_t bar = (bot & 0xffff0000ffff0000ULL) >> 16;
+	uint64_t tgb = (top & 0x0000ffff0000ffffULL);
+	uint64_t bgb = (bot & 0x0000ffff0000ffffULL);
+	uint64_t ar, gb;
+	uint32_t a, r, g, b;
+
+	ar = (tar << 8) + dist_y * (bar - tar);
+	gb = (tgb << 8) + dist_y * (bgb - tgb);
+
+	a = ((ar >> 24) & 0xff000000);
+	r = ((ar >>  0) & 0x00ff0000);
+	g = ((gb >> 40) & 0x0000ff00);
+	b = ((gb >> 16) & 0x000000ff);
+#endif
+
+	iter->buffer[i] = a | r | g | b;
+    }
+
+    info->y += iter->image->common.transform->matrix[1][1];
+
+    return iter->buffer;
+}
+
+static void
+bilinear_cover_iter_fini (pixman_iter_t *iter)
+{
+    free (iter->data);
+}
+
+static void
+fast_bilinear_cover_iter_init (pixman_iter_t *iter, const pixman_iter_info_t *iter_info)
+{
+    int width = iter->width;
+    bilinear_info_t *info;
+    pixman_vector_t v;
+
+    /* Reference point is the center of the pixel */
+    v.vector[0] = pixman_int_to_fixed (iter->x) + pixman_fixed_1 / 2;
+    v.vector[1] = pixman_int_to_fixed (iter->y) + pixman_fixed_1 / 2;
+    v.vector[2] = pixman_fixed_1;
+
+    if (!pixman_transform_point_3d (iter->image->common.transform, &v))
+	goto fail;
+
+    info = malloc (sizeof (*info) + (2 * width - 1) * sizeof (uint64_t));
+    if (!info)
+	goto fail;
+
+    info->x = v.vector[0] - pixman_fixed_1 / 2;
+    info->y = v.vector[1] - pixman_fixed_1 / 2;
+
+    /* It is safe to set the y coordinates to -1 initially
+     * because COVER_CLIP_BILINEAR ensures that we will only
+     * be asked to fetch lines in the [0, height) interval
+     */
+    info->line0.y = -1;
+    info->line0.buffer = &(info->data[0]);
+    info->line1.y = -1;
+    info->line1.buffer = &(info->data[width]);
+
+    iter->get_scanline = fast_fetch_bilinear_cover;
+    iter->fini = bilinear_cover_iter_fini;
+
+    iter->data = info;
+    return;
+
+fail:
+    /* Something went wrong, either a bad matrix or OOM; in such cases,
+     * we don't guarantee any particular rendering.
+     */
+    _pixman_log_error (
+	FUNC, "Allocation failure or bad matrix, skipping rendering\n");
+    
+    iter->get_scanline = _pixman_iter_get_scanline_noop;
+    iter->fini = bilinear_cover_iter_fini;
+}
+
 #define IMAGE_FLAGS							\
     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
      FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
@@ -2280,6 +2511,16 @@ static const pixman_iter_info_t fast_iters[] =
       _pixman_iter_init_bits_stride,
       fast_dest_fetch_noop, fast_write_back_r5g6b5 },
 
+    { PIXMAN_a8r8g8b8,
+      (FAST_PATH_STANDARD_FLAGS			|
+       FAST_PATH_SCALE_TRANSFORM		|
+       FAST_PATH_BILINEAR_FILTER		|
+       FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR),
+      ITER_NARROW | ITER_SRC,
+      fast_bilinear_cover_iter_init,
+      NULL, NULL
+    },
+
     { PIXMAN_null },
 };
 
commit 146116eff4874500e4499e4bf4954c8e6b9c174a
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Fri May 25 11:38:41 2012 -0400

    Add support for iter finalizers
    
    Iterators may sometimes need to allocate auxillary memory. In order to
    be able to free this memory, optional iterator finalizers are
    required.

diff --git a/pixman/pixman-general.c b/pixman/pixman-general.c
index 4da5da5..6310bff 100644
--- a/pixman/pixman-general.c
+++ b/pixman/pixman-general.c
@@ -208,6 +208,13 @@ general_composite_rect  (pixman_implementation_t *imp,
 	dest_iter.write_back (&dest_iter);
     }
 
+    if (src_iter.fini)
+	src_iter.fini (&src_iter);
+    if (mask_iter.fini)
+	mask_iter.fini (&mask_iter);
+    if (dest_iter.fini)
+	dest_iter.fini (&dest_iter);
+    
     if (scanline_buffer != (uint8_t *) stack_scanline_buffer)
 	free (scanline_buffer);
 }
diff --git a/pixman/pixman-image.c b/pixman/pixman-image.c
index 4f9c2f9..1ff1a49 100644
--- a/pixman/pixman-image.c
+++ b/pixman/pixman-image.c
@@ -926,6 +926,9 @@ _pixman_image_get_solid (pixman_implementation_t *imp,
 	    ITER_NARROW | ITER_SRC, image->common.flags);
 	
 	result = *iter.get_scanline (&iter, NULL);
+
+	if (iter.fini)
+	    iter.fini (&iter);
     }
 
     /* If necessary, convert RGB <--> BGR. */
diff --git a/pixman/pixman-implementation.c b/pixman/pixman-implementation.c
index 160847a..5884054 100644
--- a/pixman/pixman-implementation.c
+++ b/pixman/pixman-implementation.c
@@ -313,6 +313,7 @@ _pixman_implementation_iter_init (pixman_implementation_t *imp,
     iter->height = height;
     iter->iter_flags = iter_flags;
     iter->image_flags = image_flags;
+    iter->fini = NULL;
 
     if (!iter->image)
     {
diff --git a/pixman/pixman-private.h b/pixman/pixman-private.h
index af4a0b6..9646605 100644
--- a/pixman/pixman-private.h
+++ b/pixman/pixman-private.h
@@ -209,6 +209,7 @@ union pixman_image
 typedef struct pixman_iter_t pixman_iter_t;
 typedef uint32_t *(* pixman_iter_get_scanline_t) (pixman_iter_t *iter, const uint32_t *mask);
 typedef void      (* pixman_iter_write_back_t)   (pixman_iter_t *iter);
+typedef void	  (* pixman_iter_fini_t)	 (pixman_iter_t *iter);
 
 typedef enum
 {
@@ -255,6 +256,7 @@ struct pixman_iter_t
     /* These function pointers are initialized by the implementation */
     pixman_iter_get_scanline_t	get_scanline;
     pixman_iter_write_back_t	write_back;
+    pixman_iter_fini_t          fini;
 
     /* These fields are scratch data that implementations can use */
     void *			data;
commit 1be9208e0404fcf41b4e83734d8e33437f50c2bd
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Wed May 22 18:48:08 2013 -0400

    test/scaling-bench.c: New benchmark for bilinear scaling
    
    This new benchmark scales a 320 x 240 test a8r8g8b8 image by all
    ratios from 0.1, 0.2, ... up to 10.0 and reports the time it to took
    to do each of the scaling operations, and the time spent per
    destination pixel.
    
    The times reported for the scaling operations are given in
    milliseconds, the times-per-pixel are in nanoseconds.
    
    V2: Format output better

diff --git a/test/Makefile.sources b/test/Makefile.sources
index b5fc740..2fabdb5 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -33,6 +33,7 @@ OTHERPROGRAMS =                 \
 	lowlevel-blt-bench	\
 	radial-perf-test	\
         check-formats           \
+	scaling-bench		\
 	$(NULL)
 
 # Utility functions
diff --git a/test/scaling-bench.c b/test/scaling-bench.c
new file mode 100644
index 0000000..b39adef
--- /dev/null
+++ b/test/scaling-bench.c
@@ -0,0 +1,69 @@
+#include <stdlib.h>
+#include "utils.h"
+
+#define SOURCE_WIDTH 320
+#define SOURCE_HEIGHT 240
+
+static pixman_image_t *
+make_source (void)
+{
+    size_t n_bytes = (SOURCE_WIDTH + 2) * (SOURCE_HEIGHT + 2) * 4;
+    uint32_t *data = malloc (n_bytes);
+    pixman_image_t *source;
+
+    prng_randmemset (data, n_bytes, 0);
+    
+    source = pixman_image_create_bits (
+	PIXMAN_a8r8g8b8, SOURCE_WIDTH + 2, SOURCE_HEIGHT + 2,
+	data,
+	(SOURCE_WIDTH + 2) * 4);
+
+    pixman_image_set_filter (source, PIXMAN_FILTER_BILINEAR, NULL, 0);
+
+    return source;
+}
+
+int
+main ()
+{
+    double scale;
+    pixman_image_t *src;
+
+    prng_srand (23874);
+    
+    src = make_source ();
+    printf ("# %-6s %-22s   %-14s %-12s\n",
+	    "ratio",
+	    "resolutions",
+	    "time / ms",
+	    "time per pixel / ns");
+    for (scale = 0.1; scale < 10.005; scale += 0.01)
+    {
+	int dest_width = SOURCE_WIDTH * scale + 0.5;
+	int dest_height = SOURCE_HEIGHT * scale + 0.5;
+	pixman_fixed_t s = (1 / scale) * 65536.0 + 0.5;
+	pixman_transform_t transform;
+	pixman_image_t *dest;
+	double t1, t2;
+
+	pixman_transform_init_scale (&transform, s, s);
+	pixman_image_set_transform (src, &transform);
+	
+	dest = pixman_image_create_bits (
+	    PIXMAN_a8r8g8b8, dest_width, dest_height, NULL, -1);
+
+	t1 = gettime();
+	pixman_image_composite (
+	    PIXMAN_OP_OVER, src, NULL, dest,
+	    scale, scale, 0, 0, 0, 0, dest_width, dest_height);
+	t2 = gettime();
+	
+	printf ("%6.2f : %4dx%-4d => %4dx%-4d : %12.4f : %12.4f\n",
+		scale, SOURCE_WIDTH, SOURCE_HEIGHT, dest_width, dest_height,
+		(t2 - t1) * 1000, ((t2 - t1) / (dest_width * dest_height)) * 1000000000);
+
+	pixman_image_unref (dest);
+    }
+
+    return 0;
+}