pixman: Branch 'master' - 3 commits

Thu Jun 28 17:43:17 PDT 2012

pixman/pixman-sse2.c      |   15 +++++-----
 test/lowlevel-blt-bench.c |   63 +++++++++++++++++++++++++++++++++++++++++++++-
 test/utils.c              |    6 ++--
 3 files changed, 72 insertions(+), 12 deletions(-)

New commits:
commit ff5d041b88c667141b891909acd3085c3ed54994
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date:   Mon Jun 25 07:24:27 2012 +0300

    sse2: faster bilinear scaling (use _mm_loadl_epi64)
    
    Using _mm_loadl_epi64() to load two pixels at once (pairs of top
    and bottom pixels) is faster than loading each pixel separately
    and combining them with _mm_set_epi32().
    
    === cairo-perf-trace ===
    
    before: image             firefox-fishtank   66.912   66.931   0.13%    3/3
    after:  image             firefox-fishtank   57.584   58.349   0.74%    3/3
    
    === lowlevel-blt-bench ===
    
    before: src_8888_8888 =  L1: 181.10  L2: 179.14  M:178.08 ( 11.02%)  HT:153.22  VT:133.45  R:142.24  RT: 95.32
    after:  src_8888_8888 =  L1: 228.68  L2: 225.75  M:223.98 ( 14.23%)  HT:185.32  VT:155.06  R:162.73  RT:102.52
    
    This improvement was suggested by Matt Turner on irc.

diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index 0604254..ef82a18 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -5377,17 +5377,16 @@ FAST_NEAREST_MAINLOOP_COMMON (sse2_8888_n_8888_none_OVER,
 #define BILINEAR_INTERPOLATE_ONE_PIXEL(pix)					\
 do {										\
     __m128i xmm_wh, xmm_lo, xmm_hi, a;						\
-    /* fetch 2x2 pixel block into sse2 register */				\
-    uint32_t tl = src_top [pixman_fixed_to_int (vx)];				\
-    uint32_t tr = src_top [pixman_fixed_to_int (vx) + 1];			\
-    uint32_t bl = src_bottom [pixman_fixed_to_int (vx)];			\
-    uint32_t br = src_bottom [pixman_fixed_to_int (vx) + 1];			\
-    a = _mm_set_epi32 (tr, tl, br, bl);						\
+    /* fetch 2x2 pixel block into sse2 registers */				\
+    __m128i tltr = _mm_loadl_epi64 (						\
+			    (__m128i *)&src_top[pixman_fixed_to_int (vx)]);	\
+    __m128i blbr = _mm_loadl_epi64 (						\
+			    (__m128i *)&src_bottom[pixman_fixed_to_int (vx)]);	\
     vx += unit_x;								\
     /* vertical interpolation */						\
-    a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpackhi_epi8 (a, xmm_zero),	\
+    a = _mm_add_epi16 (_mm_mullo_epi16 (_mm_unpacklo_epi8 (tltr, xmm_zero),	\
 					xmm_wt),				\
-		       _mm_mullo_epi16 (_mm_unpacklo_epi8 (a, xmm_zero),	\
+		       _mm_mullo_epi16 (_mm_unpacklo_epi8 (blbr, xmm_zero),	\
 					xmm_wb));				\
     /* calculate horizontal weights */						\
     xmm_wh = _mm_add_epi16 (xmm_addc,						\
commit fc162bad561a516f648daf07e9d22d427fe60e74
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date:   Mon Jun 25 07:11:59 2012 +0300

    test: support nearest/bilinear scaling in lowlevel-blt-bench
    
    Scale factor is selected to be nearly 1x, so that the MPix/s results
    can be directly compared with the results of non-scaled compositing
    operations.

diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c
index b44b9f8..3afa926 100644
--- a/test/lowlevel-blt-bench.c
+++ b/test/lowlevel-blt-bench.c
@@ -80,10 +80,28 @@ bench_memcpy ()
     return (double)total / (t2 - t1);
 }
 
+static pixman_bool_t use_scaling = FALSE;
+static pixman_filter_t filter = PIXMAN_FILTER_NEAREST;
+
+/* nearly 1x scale factor */
+static pixman_transform_t m =
+{
+    {
+        { pixman_fixed_1 + 1, 0,              0              },
+        { 0,                  pixman_fixed_1, 0              },
+        { 0,                  0,              pixman_fixed_1 }
+    }
+};
+
 static void
 pixman_image_composite_wrapper (pixman_implementation_t *impl,
 				pixman_composite_info_t *info)
 {
+    if (use_scaling)
+    {
+        pixman_image_set_filter (info->src_image, filter, NULL, 0);
+        pixman_image_set_transform(info->src_image, &m);
+    }
     pixman_image_composite (info->op,
 			    info->src_image, info->mask_image, info->dest_image,
 			    info->src_x, info->src_y,
@@ -96,6 +114,11 @@ static void
 pixman_image_composite_empty (pixman_implementation_t *impl,
 			      pixman_composite_info_t *info)
 {
+    if (use_scaling)
+    {
+        pixman_image_set_filter (info->src_image, filter, NULL, 0);
+        pixman_image_set_transform(info->src_image, &m);
+    }
     pixman_image_composite (info->op,
 			    info->src_image, info->mask_image, info->dest_image,
 			    0, 0, 0, 0, 0, 0, 1, 1);
@@ -669,7 +692,35 @@ main (int argc, char *argv[])
 {
     double x;
     int i;
-    char *pattern = argc > 1 ? argv[1] : "all";
+    const char *pattern = NULL;
+    for (i = 1; i < argc; i++)
+    {
+	if (argv[i][0] == '-')
+	{
+	    if (strchr (argv[i] + 1, 'b'))
+	    {
+		use_scaling = TRUE;
+		filter = PIXMAN_FILTER_BILINEAR;
+	    }
+	    else if (strchr (argv[i] + 1, 'n'))
+	    {
+		use_scaling = TRUE;
+		filter = PIXMAN_FILTER_NEAREST;
+	    }
+	}
+	else
+	{
+	    pattern = argv[i];
+	}
+    }
+
+    if (!pattern)
+    {
+	printf ("Usage: lowlevel-blt-bench [-b] [-n] pattern\n");
+	printf ("  -n : benchmark nearest scaling\n");
+	printf ("  -b : benchmark bilinear scaling\n");
+	return 1;
+    }
 
     src = aligned_malloc (4096, BUFSIZE * 3);
     memset (src, 0xCC, BUFSIZE * 3);
@@ -706,6 +757,16 @@ main (int argc, char *argv[])
     bandwidth = x = bench_memcpy ();
     printf ("reference memcpy speed = %.1fMB/s (%.1fMP/s for 32bpp fills)\n",
             x / 1000000., x / 4000000);
+    if (use_scaling)
+    {
+	printf ("---\n");
+	if (filter == PIXMAN_FILTER_BILINEAR)
+	    printf ("BILINEAR scaling\n");
+	else if (filter == PIXMAN_FILTER_NEAREST)
+	    printf ("NEAREST scaling\n");
+	else
+	    printf ("UNKNOWN scaling\n");
+    }
     printf ("---\n");
 
     for (i = 0; i < ARRAY_LENGTH (tests_tbl); i++)
commit 387e9bcddb90bd2c7d1dfb81c073196f9f81042d
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date:   Sat Jun 23 04:08:28 2012 +0300

    test: Fix for strict aliasing issue in 'get_random_seed'
    
    Gets rid of gcc warning when compiled with -fstrict-aliasing option in CFLAGS

diff --git a/test/utils.c b/test/utils.c
index 0abc32c..563b33d 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -686,9 +686,9 @@ gettime (void)
 uint32_t
 get_random_seed (void)
 {
-    double d = gettime();
-
-    lcg_srand (*(uint32_t *)&d);
+    union { double d; uint32_t u32; } t;
+    t.d = gettime();
+    lcg_srand (t.u32);
 
     return lcg_rand_u32 ();
 }