pixman: Branch 'master' - 3 commits

Tue Jan 29 12:31:35 PST 2013

Makefile.am                   |    4 ++--
 pixman/pixman-combine-float.c |   30 ++++++++++++++++++------------
 test/lowlevel-blt-bench.c     |   31 +++++++++++++++++++++++++------
 3 files changed, 45 insertions(+), 20 deletions(-)

New commits:
commit afde862928da7ac927cf4b60a022fafe5f060d26
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Sun Jan 27 20:08:06 2013 -0500

    Change default GPGKEY to 3892336E, which is soren.sandmann at gmail.com
    
    The old one belongs to the email address sandmann at daimi.au.dk, which
    doesn't work anyore.
    
    Also use gpg to get the name and address for the "(Signed by ...)"
    line since that works more reliably for me than using git.

diff --git a/Makefile.am b/Makefile.am
index 88ff897..5137c9e 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -10,7 +10,7 @@ snapshot:
 	test -d "$(srcdir)/.git" && distdir=$$distdir-`cd "$(srcdir)" && git rev-parse HEAD | cut -c 1-6`; \
 	$(MAKE) $(AM_MAKEFLAGS) distdir="$$distdir" dist
 
-GPGKEY=6FF7C1A8
+GPGKEY=3892336E
 USERNAME=$$USER
 RELEASE_OR_SNAPSHOT = $$(if test "x$(PIXMAN_VERSION_MINOR)" = "x$$(echo "$(PIXMAN_VERSION_MINOR)/2*2" | bc)" ; then echo release; else echo snapshot; fi)
 RELEASE_CAIRO_HOST =	$(USERNAME)@cairographics.org
@@ -121,7 +121,7 @@ release-publish-message: $(HASHFILES) ensure-prev
 	@echo ""
 	@echo "GPG signature:"
 	@echo "	$(RELEASE_CAIRO_URL)/$(gpg_file)"
-	@echo "	(signed by `git config --get user.name` <`git config --get user.email`>)"
+	@echo "	(signed by`gpg --list-keys $(GPGKEY) | grep uid | cut -b4- | tr -s " "`)"
 	@echo ""
 	@echo "Git:"
 	@echo "	git://git.freedesktop.org/git/pixman"
commit 69a7a9b6b6dc5b769888c469de3435059318f7cc
Author: Ben Avison <bavison at riscosopen.org>
Date:   Thu Jan 24 18:19:48 2013 +0000

    Improve L1 and L2 benchmark tests for caches that don't use allocate-on-write
    
    In particular this affects single-core ARMs (e.g. ARM11, Cortex-A8), which
    are usually configured this way. For other CPUs, this should only add a
    constant time, which will be cancelled out by the EXCLUDE_OVERHEAD runs.
    
    The problems were caused by cachelines becoming permanently evicted from
    the cache, because the code that was intended to pull them back in again on
    each iteration assumed too long a cache line (for the L1 test) or failed to
    read memory beyond the first pixel row (for the L2 test). Also, the reloading
    of the source buffer was unnecessary.
    
    These issues were identified by Siarhei in this post:
    http://lists.freedesktop.org/archives/pixman/2013-January/002543.html

diff --git a/test/lowlevel-blt-bench.c b/test/lowlevel-blt-bench.c
index 7336fa0..8e80b42 100644
--- a/test/lowlevel-blt-bench.c
+++ b/test/lowlevel-blt-bench.c
@@ -33,6 +33,14 @@
 #define L1CACHE_SIZE (8 * 1024)
 #define L2CACHE_SIZE (128 * 1024)
 
+/* This is applied to both L1 and L2 tests - alternatively, you could
+ * parameterise bench_L or split it into two functions. It could be
+ * read at runtime on some architectures, but it only really matters
+ * that it's a number that's an integer divisor of both cacheline
+ * lengths, and further, it only really matters for caches that don't
+ * do allocate0on-write. */
+#define CACHELINE_LENGTH (32) /* bytes */
+
 #define WIDTH  1920
 #define HEIGHT 1080
 #define BUFSIZE (WIDTH * HEIGHT * 4)
@@ -168,18 +176,29 @@ bench_L  (pixman_op_t              op,
           int                      width,
           int                      lines_count)
 {
-    int64_t      i, j;
+    int64_t      i, j, k;
     int          x = 0;
     int          q = 0;
     volatile int qx;
 
     for (i = 0; i < n; i++)
     {
-	/* touch destination buffer to fetch it into L1 cache */
-	for (j = 0; j < width + 64; j += 16) {
-	    q += dst[j];
-	    q += src[j];
-	}
+        /* For caches without allocate-on-write, we need to force the
+         * destination buffer back into the cache on each iteration,
+         * otherwise if they are evicted during the test, they remain
+         * uncached. This doesn't matter for tests which read the
+         * destination buffer, or for caches that do allocate-on-write,
+         * but in those cases this loop just adds constant time, which
+         * should be successfully cancelled out.
+         */
+        for (j = 0; j < lines_count; j++)
+        {
+            for (k = 0; k < width + 62; k += CACHELINE_LENGTH / sizeof *dst)
+            {
+                q += dst[j * WIDTH + k];
+            }
+            q += dst[j * WIDTH + width + 62];
+        }
 	if (++x >= 64)
 	    x = 0;
 	call_func (func, op, src_img, mask_img, dst_img, x, 0, x, 0, 63 - x, 0, width, lines_count);
commit 1fa67f499d3826fad8783684bb90c8aadd9f682f
Author: SÃ¸ren Sandmann Pedersen <ssp at redhat.com>
Date:   Fri Jan 18 14:13:21 2013 -0500

    pixman-combine-float.c: Use IS_ZERO() in clip_color() and set_sat()
    
    The clip_color() function has some checks to avoid division by zero,
    but they are done by comparing the value to 4 * FLT_EPSILON, where a
    better choice is the IS_ZERO() macro that compares to +/- FLT_MIN.
    
    In set_sat(), the check is that *max > *min before dividing by *max -
    *min, but that has the potential problem that interactions between GCC
    optimizions and 80 bit x87 registers could mean that (*max > *min) is
    true in 80 bits, but (*max - *min) is 0 in 32 bits, so that the
    division by zero is not prevented. Using IS_ZERO() here as well
    prevents this.

diff --git a/pixman/pixman-combine-float.c b/pixman/pixman-combine-float.c
index c916df8..06ce203 100644
--- a/pixman/pixman-combine-float.c
+++ b/pixman/pixman-combine-float.c
@@ -653,10 +653,12 @@ clip_color (rgb_t *color, float a)
     float l = get_lum (color);
     float n = channel_min (color);
     float x = channel_max (color);
+    float t;
 
     if (n < 0.0f)
     {
-	if ((l - n) < 4 * FLT_EPSILON)
+	t = l - n;
+	if (IS_ZERO (t))
 	{
 	    color->r = 0.0f;
 	    color->g = 0.0f;
@@ -664,14 +666,15 @@ clip_color (rgb_t *color, float a)
 	}
 	else
 	{
-	    color->r = l + (((color->r - l) * l) / (l - n));
-	    color->g = l + (((color->g - l) * l) / (l - n));
-	    color->b = l + (((color->b - l) * l) / (l - n));
+	    color->r = l + (((color->r - l) * l) / t);
+	    color->g = l + (((color->g - l) * l) / t);
+	    color->b = l + (((color->b - l) * l) / t);
 	}
     }
     if (x > a)
     {
-	if ((x - l) < 4 * FLT_EPSILON)
+	t = x - l;
+	if (IS_ZERO (t))
 	{
 	    color->r = a;
 	    color->g = a;
@@ -679,9 +682,9 @@ clip_color (rgb_t *color, float a)
 	}
 	else
 	{
-	    color->r = l + (((color->r - l) * (a - l) / (x - l)));
-	    color->g = l + (((color->g - l) * (a - l) / (x - l)));
-	    color->b = l + (((color->b - l) * (a - l) / (x - l)));
+	    color->r = l + (((color->r - l) * (a - l) / t));
+	    color->g = l + (((color->g - l) * (a - l) / t));
+	    color->b = l + (((color->b - l) * (a - l) / t));
 	}
     }
 }
@@ -702,6 +705,7 @@ static void
 set_sat (rgb_t *src, float sat)
 {
     float *max, *mid, *min;
+    float t;
 
     if (src->r > src->g)
     {
@@ -752,14 +756,16 @@ set_sat (rgb_t *src, float sat)
 	}
     }
 
-    if (*max > *min)
+    t = *max - *min;
+
+    if (IS_ZERO (t))
     {
-	*mid = (((*mid - *min) * sat) / (*max - *min));
-	*max = sat;
+	*mid = *max = 0.0f;
     }
     else
     {
-	*mid = *max = 0.0f;
+	*mid = ((*mid - *min) * sat) / t;
+	*max = sat;
     }
 
     *min = 0.0f;