pixman: Branch 'master' - 6 commits

Søren Sandmann Pedersen sandmann at kemper.freedesktop.org
Fri Nov 16 14:10:55 PST 2012


 pixman/pixman-arm-simd.c       |    2 
 pixman/pixman-fast-path.c      |    4 
 pixman/pixman-mips-dspr2-asm.S |  424 +++++++++++++++++++++++++++++++++++++++++
 pixman/pixman-mips-dspr2.c     |   29 ++
 pixman/pixman-mips-dspr2.h     |   82 +++++++
 pixman/pixman-mmx.c            |   60 ++---
 pixman/pixman-sse2.c           |  120 +++++------
 test/glyph-test.c              |    4 
 test/utils.c                   |    6 
 9 files changed, 634 insertions(+), 97 deletions(-)

New commits:
commit 44dd746bb68625b2f6be77c3f80292b45defe9d7
Author: Stefan Weil <sw at weilnetz.de>
Date:   Tue Nov 13 19:44:44 2012 +0100

    test: Fix compiler warnings caused by unused code
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>

diff --git a/test/utils.c b/test/utils.c
index ef0a171..c887a6d 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -694,6 +694,8 @@ get_random_seed (void)
     return lcg_rand_u32 ();
 }
 
+#ifdef HAVE_SIGACTION
+#ifdef HAVE_ALARM
 static const char *global_msg;
 
 static void
@@ -702,6 +704,8 @@ on_alarm (int signo)
     printf ("%s\n", global_msg);
     exit (1);
 }
+#endif
+#endif
 
 void
 fail_after (int seconds, const char *msg)
commit 5f96022d3bca15050958512f1c15a0067d2225af
Author: Stefan Weil <sw at weilnetz.de>
Date:   Tue Nov 13 19:38:32 2012 +0100

    pixman: Use uintptr_t in type casts from pointer to integral value
    
    These modifications fix lots of compiler warnings for systems where
    sizeof(unsigned long) != sizeof(void *).
    This is especially true for MinGW-w64 (64 bit Windows).
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>

diff --git a/pixman/pixman-arm-simd.c b/pixman/pixman-arm-simd.c
index 3d19bfa..94f9a0c 100644
--- a/pixman/pixman-arm-simd.c
+++ b/pixman/pixman-arm-simd.c
@@ -56,7 +56,7 @@ pixman_composite_add_8_8_asm_armv6 (int32_t  width,
 	/* ensure both src and dst are properly aligned before doing 32 bit reads
 	 * we'll stay in this loop if src and dst have differing alignments
 	 */
-	while (w && (((unsigned long)dst & 3) || ((unsigned long)src & 3)))
+	while (w && (((uintptr_t)dst & 3) || ((uintptr_t)src & 3)))
 	{
 	    s = *src;
 	    d = *dst;
diff --git a/pixman/pixman-fast-path.c b/pixman/pixman-fast-path.c
index d95cb4d..3429758 100644
--- a/pixman/pixman-fast-path.c
+++ b/pixman/pixman-fast-path.c
@@ -35,7 +35,7 @@
 static force_inline uint32_t
 fetch_24 (uint8_t *a)
 {
-    if (((unsigned long)a) & 1)
+    if (((uintptr_t)a) & 1)
     {
 #ifdef WORDS_BIGENDIAN
 	return (*a << 16) | (*(uint16_t *)(a + 1));
@@ -57,7 +57,7 @@ static force_inline void
 store_24 (uint8_t *a,
           uint32_t v)
 {
-    if (((unsigned long)a) & 1)
+    if (((uintptr_t)a) & 1)
     {
 #ifdef WORDS_BIGENDIAN
 	*a = (uint8_t) (v >> 16);
diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 76b6ced..aef468a 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -1402,7 +1402,7 @@ mmx_composite_over_n_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    store8888 (dst, over (vsrc, vsrca, load8888 (dst)));
 
@@ -1468,7 +1468,7 @@ mmx_composite_over_n_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t d = *dst;
 	    __m64 vdest = expand565 (to_m64 (d), 0);
@@ -1546,7 +1546,7 @@ mmx_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	uint32_t *p = (uint32_t *)mask_line;
 	uint32_t *q = (uint32_t *)dst_line;
 
-	while (twidth && (unsigned long)q & 7)
+	while (twidth && (uintptr_t)q & 7)
 	{
 	    uint32_t m = *(uint32_t *)p;
 
@@ -1637,7 +1637,7 @@ mmx_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 s = load8888 (src);
 	    __m64 d = load8888 (dst);
@@ -1707,7 +1707,7 @@ mmx_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint32_t ssrc = *src | 0xff000000;
 	    __m64 s = load8888 (&ssrc);
@@ -1881,7 +1881,7 @@ mmx_composite_over_8888_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 vsrc = load8888 (src);
 	    uint64_t d = *dst;
@@ -1984,7 +1984,7 @@ mmx_composite_over_n_8_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t m = *mask;
 
@@ -2127,21 +2127,21 @@ mmx_fill (pixman_implementation_t *imp,
 	byte_line += stride;
 	w = byte_width;
 
-	if (w >= 1 && ((unsigned long)d & 1))
+	if (w >= 1 && ((uintptr_t)d & 1))
 	{
 	    *(uint8_t *)d = (xor & 0xff);
 	    w--;
 	    d++;
 	}
 
-	if (w >= 2 && ((unsigned long)d & 3))
+	if (w >= 2 && ((uintptr_t)d & 3))
 	{
 	    *(uint16_t *)d = xor;
 	    w -= 2;
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 7))
+	while (w >= 4 && ((uintptr_t)d & 7))
 	{
 	    *(uint32_t *)d = xor;
 
@@ -2227,7 +2227,7 @@ mmx_composite_src_x888_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    s = *src++;
 	    *dst = CONVERT_8888_TO_0565 (s);
@@ -2305,7 +2305,7 @@ mmx_composite_src_n_8_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t m = *mask;
 
@@ -2419,7 +2419,7 @@ mmx_composite_over_n_8_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint64_t m = *mask;
 
@@ -2536,7 +2536,7 @@ mmx_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 vsrc = load8888 (src);
 	    uint64_t d = *dst;
@@ -2651,7 +2651,7 @@ mmx_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 s = load8888 (src);
 	    __m64 d = load8888 (dst);
@@ -2739,7 +2739,7 @@ mmx_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	uint32_t *p = (uint32_t *)mask_line;
 	uint16_t *q = (uint16_t *)dst_line;
 
-	while (twidth && ((unsigned long)q & 7))
+	while (twidth && ((uintptr_t)q & 7))
 	{
 	    uint32_t m = *(uint32_t *)p;
 
@@ -2840,7 +2840,7 @@ mmx_composite_in_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    uint16_t tmp;
 	    uint8_t a;
@@ -2911,7 +2911,7 @@ mmx_composite_in_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 3)
+	while (w && (uintptr_t)dst & 3)
 	{
 	    uint8_t s, d;
 	    uint16_t tmp;
@@ -2990,7 +2990,7 @@ mmx_composite_add_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 3)
+	while (w && (uintptr_t)dst & 3)
 	{
 	    uint16_t tmp;
 	    uint16_t a;
@@ -3067,7 +3067,7 @@ mmx_composite_add_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    s = *src;
 	    d = *dst;
@@ -3130,7 +3130,7 @@ mmx_composite_add_0565_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    s = *src++;
 	    if (s)
@@ -3212,7 +3212,7 @@ mmx_composite_add_8888_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    store (dst, _mm_adds_pu8 (load ((const uint32_t *)src),
 	                              load ((const uint32_t *)dst)));
@@ -3296,7 +3296,7 @@ mmx_blt (pixman_implementation_t *imp,
 	dst_bytes += dst_stride;
 	w = byte_width;
 
-	if (w >= 1 && ((unsigned long)d & 1))
+	if (w >= 1 && ((uintptr_t)d & 1))
 	{
 	    *(uint8_t *)d = *(uint8_t *)s;
 	    w -= 1;
@@ -3304,7 +3304,7 @@ mmx_blt (pixman_implementation_t *imp,
 	    d += 1;
 	}
 
-	if (w >= 2 && ((unsigned long)d & 3))
+	if (w >= 2 && ((uintptr_t)d & 3))
 	{
 	    *(uint16_t *)d = *(uint16_t *)s;
 	    w -= 2;
@@ -3312,7 +3312,7 @@ mmx_blt (pixman_implementation_t *imp,
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 7))
+	while (w >= 4 && ((uintptr_t)d & 7))
 	{
 	    *(uint32_t *)d = ldl_u ((uint32_t *)s);
 
@@ -3495,7 +3495,7 @@ mmx_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 
 	CHECKPOINT ();
 
-	while (w && (unsigned long)dst & 7)
+	while (w && (uintptr_t)dst & 7)
 	{
 	    __m64 vdest = load8888 (dst);
 
@@ -3778,7 +3778,7 @@ mmx_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 7)
+    while (w && ((uintptr_t)dst) & 7)
     {
 	*dst++ = (*src++) | 0xff000000;
 	w--;
@@ -3820,7 +3820,7 @@ mmx_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 0x0f)
+    while (w && ((uintptr_t)dst) & 0x0f)
     {
 	uint16_t s = *src++;
 
@@ -3864,7 +3864,7 @@ mmx_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && (((unsigned long)dst) & 15))
+    while (w && (((uintptr_t)dst) & 15))
     {
         *dst++ = *(src++) << 24;
         w--;
diff --git a/pixman/pixman-sse2.c b/pixman/pixman-sse2.c
index efed310..7e980c9 100644
--- a/pixman/pixman-sse2.c
+++ b/pixman/pixman-sse2.c
@@ -576,7 +576,7 @@ core_combine_over_u_sse2_mask (uint32_t *	  pd,
     uint32_t s, d;
 
     /* Align dst on a 16-byte boundary */
-    while (w && ((unsigned long)pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = combine1 (ps, pm);
@@ -661,7 +661,7 @@ core_combine_over_u_sse2_no_mask (uint32_t *	  pd,
     uint32_t s, d;
 
     /* Align dst on a 16-byte boundary */
-    while (w && ((unsigned long)pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = *ps;
@@ -753,7 +753,7 @@ sse2_combine_over_reverse_u (pixman_implementation_t *imp,
 
     /* Align dst on a 16-byte boundary */
     while (w &&
-           ((unsigned long)pd & 15))
+           ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = combine1 (ps, pm);
@@ -840,7 +840,7 @@ sse2_combine_in_u (pixman_implementation_t *imp,
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -901,7 +901,7 @@ sse2_combine_in_reverse_u (pixman_implementation_t *imp,
     __m128i xmm_src_lo, xmm_src_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -957,7 +957,7 @@ sse2_combine_out_reverse_u (pixman_implementation_t *imp,
                             const uint32_t *         pm,
                             int                      w)
 {
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
@@ -1026,7 +1026,7 @@ sse2_combine_out_u (pixman_implementation_t *imp,
                     const uint32_t *         pm,
                     int                      w)
 {
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	uint32_t s = combine1 (ps, pm);
 	uint32_t d = *pd;
@@ -1113,7 +1113,7 @@ sse2_combine_atop_u (pixman_implementation_t *imp,
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1197,7 +1197,7 @@ sse2_combine_atop_reverse_u (pixman_implementation_t *imp,
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1285,7 +1285,7 @@ sse2_combine_xor_u (pixman_implementation_t *imp,
     __m128i xmm_alpha_src_lo, xmm_alpha_src_hi;
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
 
-    while (w && ((unsigned long) pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1357,7 +1357,7 @@ sse2_combine_add_u (pixman_implementation_t *imp,
     const uint32_t* ps = src;
     const uint32_t* pm = mask;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1430,7 +1430,7 @@ sse2_combine_saturate_u (pixman_implementation_t *imp,
     uint32_t pack_cmp;
     __m128i xmm_src, xmm_dst;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = combine1 (ps, pm);
 	d = *pd;
@@ -1518,7 +1518,7 @@ sse2_combine_src_ca (pixman_implementation_t *imp,
     __m128i xmm_mask_lo, xmm_mask_hi;
     __m128i xmm_dst_lo, xmm_dst_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1586,7 +1586,7 @@ sse2_combine_over_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1662,7 +1662,7 @@ sse2_combine_over_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1727,7 +1727,7 @@ sse2_combine_in_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1802,7 +1802,7 @@ sse2_combine_in_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1875,7 +1875,7 @@ sse2_combine_out_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -1951,7 +1951,7 @@ sse2_combine_out_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2048,7 +2048,7 @@ sse2_combine_atop_ca (pixman_implementation_t *imp,
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2141,7 +2141,7 @@ sse2_combine_atop_reverse_ca (pixman_implementation_t *imp,
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2237,7 +2237,7 @@ sse2_combine_xor_ca (pixman_implementation_t *imp,
     __m128i xmm_alpha_dst_lo, xmm_alpha_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2313,7 +2313,7 @@ sse2_combine_add_ca (pixman_implementation_t *imp,
     __m128i xmm_dst_lo, xmm_dst_hi;
     __m128i xmm_mask_lo, xmm_mask_hi;
 
-    while (w && (unsigned long)pd & 15)
+    while (w && (uintptr_t)pd & 15)
     {
 	s = *ps++;
 	m = *pm++;
@@ -2414,7 +2414,7 @@ sse2_composite_over_n_8888 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    d = *dst;
 	    *dst++ = pack_1x128_32 (over_1x128 (xmm_src,
@@ -2483,7 +2483,7 @@ sse2_composite_over_n_0565 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    d = *dst;
 
@@ -2568,7 +2568,7 @@ sse2_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	mask_line += mask_stride;
 
-	while (w && (unsigned long)pd & 15)
+	while (w && (uintptr_t)pd & 15)
 	{
 	    m = *pm++;
 
@@ -2682,7 +2682,7 @@ sse2_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	mask_line += mask_stride;
 
-	while (w && (unsigned long)pd & 15)
+	while (w && (uintptr_t)pd & 15)
 	{
 	    m = *pm++;
 
@@ -2786,7 +2786,7 @@ sse2_composite_over_8888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint32_t s = *src++;
 
@@ -2878,7 +2878,7 @@ sse2_composite_src_x888_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    s = *src++;
 	    *dst = CONVERT_8888_TO_0565 (s);
@@ -2932,7 +2932,7 @@ sse2_composite_src_x888_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    *dst++ = *src++ | 0xff000000;
 	    w--;
@@ -2999,7 +2999,7 @@ sse2_composite_over_x888_n_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint32_t s = (*src++) | 0xff000000;
 	    uint32_t d = *dst;
@@ -3125,7 +3125,7 @@ sse2_composite_over_8888_0565 (pixman_implementation_t *imp,
 
 	/* Align dst on a 16-byte boundary */
 	while (w &&
-	       ((unsigned long)dst & 15))
+	       ((uintptr_t)dst & 15))
 	{
 	    s = *src++;
 	    d = *dst;
@@ -3231,7 +3231,7 @@ sse2_composite_over_n_8_8888 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint8_t m = *mask++;
 
@@ -3372,21 +3372,21 @@ sse2_fill (pixman_implementation_t *imp,
 	byte_line += stride;
 	w = byte_width;
 
-	if (w >= 1 && ((unsigned long)d & 1))
+	if (w >= 1 && ((uintptr_t)d & 1))
 	{
 	    *(uint8_t *)d = xor;
 	    w -= 1;
 	    d += 1;
 	}
 
-	while (w >= 2 && ((unsigned long)d & 3))
+	while (w >= 2 && ((uintptr_t)d & 3))
 	{
 	    *(uint16_t *)d = xor;
 	    w -= 2;
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 15))
+	while (w >= 4 && ((uintptr_t)d & 15))
 	{
 	    *(uint32_t *)d = xor;
 
@@ -3505,7 +3505,7 @@ sse2_composite_src_n_8_8888 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    uint8_t m = *mask++;
 
@@ -3621,7 +3621,7 @@ sse2_composite_over_n_8_0565 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    m = *mask++;
 
@@ -3745,7 +3745,7 @@ sse2_composite_over_pixbuf_0565 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    s = *src++;
 	    d = *dst;
@@ -3854,7 +3854,7 @@ sse2_composite_over_pixbuf_8888 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    s = *src++;
 	    d = *dst;
@@ -3957,7 +3957,7 @@ sse2_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	dst_line += dst_stride;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    m = *(uint32_t *) mask;
 
@@ -4083,7 +4083,7 @@ sse2_composite_in_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
@@ -4176,7 +4176,7 @@ sse2_composite_in_n_8 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    d = (uint32_t) *dst;
 
@@ -4245,7 +4245,7 @@ sse2_composite_in_8_8 (pixman_implementation_t *imp,
 	src_line += src_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    s = (uint32_t) *src++;
 	    d = (uint32_t) *dst;
@@ -4322,7 +4322,7 @@ sse2_composite_add_n_8_8 (pixman_implementation_t *imp,
 	mask_line += mask_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    m = (uint32_t) *mask++;
 	    d = (uint32_t) *dst;
@@ -4414,7 +4414,7 @@ sse2_composite_add_n_8 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && ((unsigned long)dst & 15))
+	while (w && ((uintptr_t)dst & 15))
 	{
 	    *dst = (uint8_t)_mm_cvtsi128_si32 (
 		_mm_adds_epu8 (
@@ -4474,7 +4474,7 @@ sse2_composite_add_8_8 (pixman_implementation_t *imp,
 	w = width;
 
 	/* Small head */
-	while (w && (unsigned long)dst & 3)
+	while (w && (uintptr_t)dst & 3)
 	{
 	    t = (*dst) + (*src++);
 	    *dst++ = t | (0 - (t >> 8));
@@ -4582,7 +4582,7 @@ sse2_blt (pixman_implementation_t *imp,
 	dst_bytes += dst_stride;
 	w = byte_width;
 
-	while (w >= 2 && ((unsigned long)d & 3))
+	while (w >= 2 && ((uintptr_t)d & 3))
 	{
 	    *(uint16_t *)d = *(uint16_t *)s;
 	    w -= 2;
@@ -4590,7 +4590,7 @@ sse2_blt (pixman_implementation_t *imp,
 	    d += 2;
 	}
 
-	while (w >= 4 && ((unsigned long)d & 15))
+	while (w >= 4 && ((uintptr_t)d & 15))
 	{
 	    *(uint32_t *)d = *(uint32_t *)s;
 
@@ -4697,7 +4697,7 @@ sse2_composite_over_x888_8_8888 (pixman_implementation_t *imp,
 
         w = width;
 
-        while (w && (unsigned long)dst & 15)
+        while (w && (uintptr_t)dst & 15)
         {
             s = 0xff000000 | *src++;
             m = (uint32_t) *mask++;
@@ -4821,7 +4821,7 @@ sse2_composite_over_8888_8_8888 (pixman_implementation_t *imp,
 
         w = width;
 
-        while (w && (unsigned long)dst & 15)
+        while (w && (uintptr_t)dst & 15)
         {
 	    uint32_t sa;
 
@@ -4960,7 +4960,7 @@ sse2_composite_over_reverse_n_8888 (pixman_implementation_t *imp,
 	dst_line += dst_stride;
 	w = width;
 
-	while (w && (unsigned long)dst & 15)
+	while (w && (uintptr_t)dst & 15)
 	{
 	    __m128i vd;
 
@@ -5045,7 +5045,7 @@ sse2_composite_over_8888_8888_8888 (pixman_implementation_t *imp,
 
         w = width;
 
-        while (w && (unsigned long)dst & 15)
+        while (w && (uintptr_t)dst & 15)
         {
 	    uint32_t sa;
 
@@ -5173,7 +5173,7 @@ scaled_nearest_scanline_sse2_8888_8888_OVER (uint32_t*       pd,
 	return;
 
     /* Align dst on a 16-byte boundary */
-    while (w && ((unsigned long)pd & 15))
+    while (w && ((uintptr_t)pd & 15))
     {
 	d = *pd;
 	s = combine1 (ps + pixman_fixed_to_int (vx), pm);
@@ -5291,7 +5291,7 @@ scaled_nearest_scanline_sse2_8888_n_8888_OVER (const uint32_t * mask,
 
     xmm_mask = create_mask_16_128 (*mask >> 24);
 
-    while (w && (unsigned long)dst & 15)
+    while (w && (uintptr_t)dst & 15)
     {
 	uint32_t s = *(src + pixman_fixed_to_int (vx));
 	vx += unit_x;
@@ -5538,7 +5538,7 @@ scaled_bilinear_scanline_sse2_8888_8888_OVER (uint32_t *       dst,
     BILINEAR_DECLARE_VARIABLES;
     uint32_t pix1, pix2, pix3, pix4;
 
-    while (w && ((unsigned long)dst & 15))
+    while (w && ((uintptr_t)dst & 15))
     {
 	BILINEAR_INTERPOLATE_ONE_PIXEL (pix1);
 
@@ -5639,7 +5639,7 @@ scaled_bilinear_scanline_sse2_8888_8_8888_OVER (uint32_t *       dst,
     uint32_t pix1, pix2, pix3, pix4;
     uint32_t m;
 
-    while (w && ((unsigned long)dst & 15))
+    while (w && ((uintptr_t)dst & 15))
     {
 	uint32_t sa;
 
@@ -5930,7 +5930,7 @@ sse2_fetch_x8r8g8b8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 0x0f)
+    while (w && ((uintptr_t)dst) & 0x0f)
     {
 	*dst++ = (*src++) | 0xff000000;
 	w--;
@@ -5966,7 +5966,7 @@ sse2_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && ((unsigned long)dst) & 0x0f)
+    while (w && ((uintptr_t)dst) & 0x0f)
     {
 	uint16_t s = *src++;
 
@@ -6012,7 +6012,7 @@ sse2_fetch_a8 (pixman_iter_t *iter, const uint32_t *mask)
 
     iter->bits += iter->stride;
 
-    while (w && (((unsigned long)dst) & 15))
+    while (w && (((uintptr_t)dst) & 15))
     {
         *dst++ = *(src++) << 24;
         w--;
diff --git a/test/glyph-test.c b/test/glyph-test.c
index 9dd5b41..501cc2e 100644
--- a/test/glyph-test.c
+++ b/test/glyph-test.c
@@ -215,8 +215,8 @@ create_image (int max_size, const pixman_format_code_t *formats, uint32_t flags)
     return image;
 }
 
-#define KEY1(p) ((void *)(((unsigned long)p) ^ (0xa7e23dfaUL)))
-#define KEY2(p) ((void *)(((unsigned long)p) ^ (0xabcd9876UL)))
+#define KEY1(p) ((void *)(((uintptr_t)p) ^ (0xa7e23dfaUL)))
+#define KEY2(p) ((void *)(((uintptr_t)p) ^ (0xabcd9876UL)))
 
 #define MAX_GLYPHS 32
 
diff --git a/test/utils.c b/test/utils.c
index 716bb75..ef0a171 100644
--- a/test/utils.c
+++ b/test/utils.c
@@ -377,7 +377,7 @@ fence_malloc (int64_t len)
 	return NULL;
     }
 
-    initial_page = (uint8_t *)(((unsigned long)addr + page_mask) & ~page_mask);
+    initial_page = (uint8_t *)(((uintptr_t)addr + page_mask) & ~page_mask);
     leading_protected = initial_page + page_size;
     payload = leading_protected + N_LEADING_PROTECTED * page_size;
     trailing_protected = payload + n_payload_bytes;
commit a96efd02d68b726d6d140d0bd211bc7cc1be127a
Author: Stefan Weil <sw at weilnetz.de>
Date:   Tue Nov 13 19:44:15 2012 +0100

    Always use xmmintrin.h for 64 bit Windows
    
    MinGW-w64 uses the GNU compiler and does not define _MSC_VER.
    Nevertheless, it provides xmmintrin.h and must be handled
    here like the MS compiler. Otherwise compilation fails due to
    conflicting declarations.
    
    Signed-off-by: Stefan Weil <sw at weilnetz.de>

diff --git a/pixman/pixman-mmx.c b/pixman/pixman-mmx.c
index 1e6dbe8..76b6ced 100644
--- a/pixman/pixman-mmx.c
+++ b/pixman/pixman-mmx.c
@@ -62,7 +62,7 @@ _mm_empty (void)
 #endif
 
 #ifdef USE_X86_MMX
-# if (defined(__SUNPRO_C) || defined(_MSC_VER))
+# if (defined(__SUNPRO_C) || defined(_MSC_VER) || defined(_WIN64))
 #  include <xmmintrin.h>
 # else
 /* We have to compile with -msse to use xmmintrin.h, but that causes SSE
commit 899e0d60524bcd2cff6cad6acb310181fb96b39a
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Mon Nov 12 22:48:51 2012 +0100

    MIPS: DSPr2: Added several nearest neighbor fast paths with a8 mask:
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench -n
    
    Referent (before):
            over_8888_8_0565 =  L1:   9.62  L2:   8.85  M:  7.40 ( 39.27%)  HT:  5.67  VT:  5.61  R:  5.45  RT:  2.98 (  22Kops/s)
            over_0565_8_0565 =  L1:   7.90  L2:   7.49  M:  6.72 ( 26.75%)  HT:  5.24  VT:  5.20  R:  5.06  RT:  2.90 (  22Kops/s)
    
    Optimized:
            over_8888_8_0565 =  L1:  18.51  L2:  16.82  M: 12.13 ( 64.43%)  HT: 10.06  VT:  9.88  R:  9.54  RT:  5.63 (  31Kops/s)
            over_0565_8_0565 =  L1:  14.82  L2:  13.94  M: 11.34 ( 45.20%)  HT:  9.45  VT:  9.35  R:  9.03  RT:  5.50 (  31Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 791244a..ba22e62 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -2100,6 +2100,163 @@ LEAF_MIPS_DSPR2(pixman_composite_add_8888_8888_asm_mips)
 
 END(pixman_composite_add_8888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+/*
+ * a0     - dst  (r5g6b5)
+ * a1     - src  (a8r8g8b8)
+ * a2     - mask (a8)
+ * a3     - w
+ * 16(sp) - vx
+ * 20(sp) - unit_x
+ */
+    beqz     a3, 4f
+     nop
+
+    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+    lw       v0, 36(sp) /* v0 = vx */
+    lw       v1, 40(sp) /* v1 = unit_x */
+    li       t6, 0x00ff00ff
+    li       t7, 0xf800f800
+    li       t8, 0x07e007e0
+    li       t9, 0x001F001F
+
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    sra      t1, v0, 16 /* t1 = vx >> 16 */
+    sll      t1, t1, 2  /* t1 = t1 * 4      (a8r8g8b8) */
+    addu     t1, a1, t1
+    lw       t1, 0(t1)  /* t1 = source      (a8r8g8b8) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
+    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
+    lhu      t4, 0(a0)  /* t4 = destination (r5g6b5) */
+    lhu      t5, 2(a0)  /* t5 = destination (r5g6b5) */
+    addiu    a2, a2, 2
+
+    CONVERT_2x0565_TO_2x8888 t4, t5, s0, s1, t8, t9, s2, s3, s4, s5
+    OVER_2x8888_2x8_2x8888   t0, t1, \
+                             t2, t3, \
+                             s0, s1, \
+                             t4, t5, \
+                             t6, s2, s3, s4, s5, t2, t3
+    CONVERT_2x8888_TO_2x0565 t4, t5, s0, s1, t7, t8, t9, s2, s3
+
+    sh       s0, 0(a0)
+    sh       s1, 2(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a3, 3f
+     nop
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 2  /* t0 = t0 * 4      (a8r8g8b8) */
+    addu     t0, a1, t0
+    lw       t0, 0(t0)  /* t0 = source      (a8r8g8b8) */
+    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
+    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t2, t3, t4, t5
+    OVER_8888_8_8888         t0, t1, t3, t2, t6, t4, t5, t7, t8
+    CONVERT_1x8888_TO_1x0565 t2, t3, t4, t5
+
+    sh       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+4:
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_8888_8_0565_OVER_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+/*
+ * a0     - dst  (r5g6b5)
+ * a1     - src  (r5g6b5)
+ * a2     - mask (a8)
+ * a3     - w
+ * 16(sp) - vx
+ * 20(sp) - unit_x
+ */
+
+    beqz     a3, 4f
+     nop
+    SAVE_REGS_ON_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+    lw       v0, 36(sp) /* v0 = vx */
+    lw       v1, 40(sp) /* v1 = unit_x */
+    li       t4, 0xf800f800
+    li       t5, 0x07e007e0
+    li       t6, 0x001F001F
+    li       t7, 0x00ff00ff
+
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+1:
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
+    addu     t0, a1, t0
+    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    sra      t1, v0, 16 /* t1 = vx >> 16 */
+    sll      t1, t1, 1  /* t1 = t1 * 2      (r5g6b5) */
+    addu     t1, a1, t1
+    lhu      t1, 0(t1)  /* t1 = source      (r5g6b5) */
+    addu     v0, v0, v1 /* v0 = vx + unit_x */
+    lbu      t2, 0(a2)  /* t2 = mask        (a8) */
+    lbu      t3, 1(a2)  /* t3 = mask        (a8) */
+    lhu      t8, 0(a0)  /* t8 = destination (r5g6b5) */
+    lhu      t9, 2(a0)  /* t9 = destination (r5g6b5) */
+    addiu    a2, a2, 2
+
+    CONVERT_2x0565_TO_2x8888 t0, t1, s0, s1, t5, t6, s2, s3, s4, s5
+    CONVERT_2x0565_TO_2x8888 t8, t9, s2, s3, t5, t6, s4, s5, t0, t1
+    OVER_2x8888_2x8_2x8888   s0, s1, \
+                             t2, t3, \
+                             s2, s3, \
+                             t0, t1, \
+                             t7, t8, t9, s4, s5, s0, s1
+    CONVERT_2x8888_TO_2x0565 t0, t1, s0, s1, t4, t5, t6, s2, s3
+
+    sh       s0, 0(a0)
+    sh       s1, 2(a0)
+    addiu    a3, a3, -2
+    addiu    t1, a3, -1
+    bgtz     t1, 1b
+     addiu   a0, a0, 4
+2:
+    beqz     a3, 3f
+     nop
+    sra      t0, v0, 16 /* t0 = vx >> 16 */
+    sll      t0, t0, 1  /* t0 = t0 * 2      (r5g6b5) */
+    addu     t0, a1, t0
+
+    lhu      t0, 0(t0)  /* t0 = source      (r5g6b5) */
+    lbu      t1, 0(a2)  /* t1 = mask        (a8) */
+    lhu      t2, 0(a0)  /* t2 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t0, t3, t4, t5
+    CONVERT_1x0565_TO_1x8888 t2, t4, t5, t6
+    OVER_8888_8_8888         t3, t1, t4, t0, t7, t2, t5, t6, t8
+    CONVERT_1x8888_TO_1x0565 t0, t3, t4, t5
+
+    sh       t3, 0(a0)
+3:
+    RESTORE_REGS_FROM_STACK 20, v0, v1, s0, s1, s2, s3, s4, s5
+4:
+    j        ra
+     nop
+
+END(pixman_scaled_nearest_scanline_0565_8_0565_OVER_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_scaled_bilinear_scanline_8888_8888_SRC_asm_mips)
 /*
  * a0     - *dst
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index afe4ec6..11f1254 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -116,6 +116,11 @@ PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, OVER,
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST (SKIP_ZERO_SRC, 8888_8888, ADD,
                                           uint32_t, uint32_t)
 
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 8888_8_0565,
+                                            OVER, uint32_t, uint16_t)
+PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST (SKIP_ZERO_SRC, 0565_8_0565,
+                                            OVER, uint16_t, uint16_t)
+
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_8888, SRC,
                                              uint32_t, uint32_t)
 PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_A8_DST (0, 8888_8_0565, SRC,
@@ -320,6 +325,12 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (ADD,  a8r8g8b8, null,     a8r8g8b8, mips_composite_add_8888_8888),
     PIXMAN_STD_FAST_PATH (ADD,  a8b8g8r8, null,     a8b8g8r8, mips_composite_add_8888_8888),
 
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8r8g8b8, r5g6b5, mips_8888_8_0565),
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, a8b8g8r8, b5g6r5, mips_8888_8_0565),
+
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, r5g6b5, r5g6b5, mips_0565_8_0565),
+    PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH (OVER, b5g6r5, b5g6r5, mips_0565_8_0565),
+
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, mips_8888_8888),
     SIMPLE_BILINEAR_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, mips_8888_8888),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index 186a501..4ac9ff9 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -246,6 +246,52 @@ mips_composite_##name (pixman_implementation_t *imp,                     \
     }                                                                    \
 }
 
+/*****************************************************************************/
+
+#define PIXMAN_MIPS_BIND_SCALED_NEAREST_SRC_A8_DST(flags, name, op,           \
+                                                  src_type, dst_type)         \
+void                                                                          \
+pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (                     \
+                                                   dst_type *       dst,      \
+                                                   const src_type * src,      \
+                                                   const uint8_t *  mask,     \
+                                                   int32_t          w,        \
+                                                   pixman_fixed_t   vx,       \
+                                                   pixman_fixed_t   unit_x);  \
+                                                                              \
+static force_inline void                                                      \
+scaled_nearest_scanline_mips_##name##_##op (const uint8_t *  mask,            \
+                                            dst_type *       pd,              \
+                                            const src_type * ps,              \
+                                            int32_t          w,               \
+                                            pixman_fixed_t   vx,              \
+                                            pixman_fixed_t   unit_x,          \
+                                            pixman_fixed_t   max_vx,          \
+                                            pixman_bool_t    zero_src)        \
+{                                                                             \
+    if ((flags & SKIP_ZERO_SRC) && zero_src)                                  \
+        return;                                                               \
+    pixman_scaled_nearest_scanline_##name##_##op##_asm_mips (pd, ps,          \
+                                                             mask, w,         \
+                                                             vx, unit_x);     \
+}                                                                             \
+                                                                              \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_cover_##op,                       \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, COVER, TRUE, FALSE)\
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_none_##op,                        \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, NONE, TRUE, FALSE) \
+FAST_NEAREST_MAINLOOP_COMMON (mips_##name##_pad_##op,                         \
+                              scaled_nearest_scanline_mips_##name##_##op,     \
+                              src_type, uint8_t, dst_type, PAD, TRUE, FALSE)
+
+/* Provide entries for the fast path table */
+#define PIXMAN_MIPS_SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func)             \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func),                     \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func),                      \
+    SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func)
+
 /****************************************************************************/
 
 #define PIXMAN_MIPS_BIND_SCALED_BILINEAR_SRC_DST(flags, name, op,            \
commit a432bdce6637aa96060b9f1e25aae51c6fb95670
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Mon Nov 12 22:48:53 2012 +0100

    MIPS: DSPr2: Added more fast-paths for OVER operation:
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench results
    
    Referent (before):
            over_n_0565 =  L1:  14.48  L2:  21.36  M: 17.57 ( 23.30%)  HT:  6.95  VT:  6.44  R:  6.39  RT:  2.16 (  22Kops/s)
            over_n_8888 =  L1:  92.60  L2:  86.13  M: 24.41 ( 64.74%)  HT:  8.94  VT:  8.06  R:  8.00  RT:  2.53 (  25Kops/s)
    
    Optimized:
            over_n_0565 =  L1:  27.65  L2: 189.22  M: 58.19 ( 77.12%)  HT: 52.80  VT: 49.88  R: 47.53  RT: 23.67 (  72Kops/s)
            over_n_8888 =  L1: 235.99  L2: 230.86  M: 29.09 ( 77.11%)  HT: 27.95  VT: 27.24  R: 26.58  RT: 18.10 (  67Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index 02adb6d..791244a 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -1342,6 +1342,140 @@ LEAF_MIPS_DSPR2(pixman_composite_over_8888_8888_asm_mips)
 
 END(pixman_composite_over_8888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_over_n_0565_asm_mips)
+/*
+ * a0 - dst  (r5g6b5)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    beqz         a2, 5f
+     nop
+
+    not          t0, a1
+    srl          t0, t0, 24
+    bgtz         t0, 1f
+     nop
+    CONVERT_1x8888_TO_1x0565 a1, t1, t2, t3
+0:
+    sh           t1, 0(a0)
+    addiu        a2, a2, -1
+    bgtz         a2, 0b
+     addiu       a0, a0, 2
+    j            ra
+     nop
+
+1:
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li           t4, 0x00ff00ff
+    li           t5, 0xf800f800
+    li           t6, 0x07e007e0
+    li           t7, 0x001F001F
+    addiu        t1, a2, -1
+    beqz         t1, 3f
+     nop
+2:
+    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
+    lhu          t2, 2(a0) /* t2 = destination (r5g6b5) */
+
+    CONVERT_2x0565_TO_2x8888 t1, t2, t3, t8, t6, t7, t9, s0, s1, s2
+    MIPS_2xUN8x4_MUL_2xUN8   t3, t8, t0, t0, t1, t2, t4, t9, s0, s1, s2, t3, t8
+    addu_s.qb                t1, t1, a1
+    addu_s.qb                t2, t2, a1
+    CONVERT_2x8888_TO_2x0565 t1, t2, t3, t8, t5, t6, t7, s0, s1
+
+    sh           t3, 0(a0)
+    sh           t8, 2(a0)
+
+    addiu        a2, a2, -2
+    addiu        t1, a2, -1
+    bgtz         t1, 2b
+     addiu       a0, a0, 4
+3:
+    beqz         a2, 4f
+     nop
+
+    lhu          t1, 0(a0) /* t1 = destination (r5g6b5) */
+
+    CONVERT_1x0565_TO_1x8888 t1, t2, s0, s1
+    MIPS_UN8x4_MUL_UN8       t2, t0, t1, t4, s0, s1, s2
+    addu_s.qb                t1, t1, a1
+    CONVERT_1x8888_TO_1x0565 t1, t2, s0, s1
+
+    sh           t2, 0(a0)
+
+4:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+5:
+    j            ra
+     nop
+
+END(pixman_composite_over_n_0565_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - w
+ */
+
+    beqz         a2, 5f
+     nop
+
+    not          t0, a1
+    srl          t0, t0, 24
+    bgtz         t0, 1f
+     nop
+0:
+    sw           a1, 0(a0)
+    addiu        a2, a2, -1
+    bgtz         a2, 0b
+     addiu       a0, a0, 4
+    j            ra
+     nop
+
+1:
+    SAVE_REGS_ON_STACK 0, s0, s1, s2
+    li           t4, 0x00ff00ff
+    addiu        t1, a2, -1
+    beqz         t1, 3f
+     nop
+2:
+    lw           t2, 0(a0) /* t2 = destination (a8r8g8b8) */
+    lw           t3, 4(a0) /* t3 = destination (a8r8g8b8) */
+
+    MIPS_2xUN8x4_MUL_2xUN8 t2, t3, t0, t0, t7, t8, t4, t9, s0, s1, s2, t2, t3
+
+    addu_s.qb    t7, t7, a1
+    addu_s.qb    t8, t8, a1
+
+    sw           t7, 0(a0)
+    sw           t8, 4(a0)
+
+    addiu        a2, a2, -2
+    addiu        t1, a2, -1
+    bgtz         t1, 2b
+     addiu       a0, a0, 8
+3:
+    beqz         a2, 4f
+     nop
+
+    lw           t1, 0(a0) /* t1 = destination (a8r8g8b8) */
+
+    MIPS_UN8x4_MUL_UN8 t1, t0, t3, t4, t5, t6, t7
+
+    addu_s.qb    t3, t3, a1
+
+    sw           t3, 0(a0)
+
+4:
+    RESTORE_REGS_FROM_STACK 0, s0, s1, s2
+5:
+    j            ra
+     nop
+
+END(pixman_composite_over_n_8888_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_add_8_8_8_asm_mips)
 /*
  * a0 - dst  (a8)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 44565e7..afe4ec6 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -81,6 +81,11 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, over_0565_n_0565,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_N_DST (SKIP_ZERO_MASK, add_8888_n_8888,
                                       uint32_t, 1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_0565,
+                                  uint16_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_DST (SKIP_ZERO_SRC, over_n_8888,
+                                  uint32_t, 1)
+
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8_8_8, uint8_t,  1,
                                          uint8_t,  1, uint8_t,  1)
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_MASK_DST (add_8888_8_8888, uint32_t, 1,
@@ -278,7 +283,9 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       x8b8g8r8, mips_composite_over_n_8_8888),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       r5g6b5,   mips_composite_over_n_8_0565),
     PIXMAN_STD_FAST_PATH (OVER, solid,    a8,       b5g6r5,   mips_composite_over_n_8_0565),
-
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     r5g6b5,   mips_composite_over_n_0565),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     a8r8g8b8, mips_composite_over_n_8888),
+    PIXMAN_STD_FAST_PATH (OVER, solid,    null,     x8r8g8b8, mips_composite_over_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    a8r8g8b8, mips_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    x8r8g8b8, mips_composite_over_8888_n_8888),
     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid,    r5g6b5,   mips_composite_over_8888_n_0565),
diff --git a/pixman/pixman-mips-dspr2.h b/pixman/pixman-mips-dspr2.h
index bddcfd8..186a501 100644
--- a/pixman/pixman-mips-dspr2.h
+++ b/pixman/pixman-mips-dspr2.h
@@ -85,6 +85,42 @@ mips_composite_##name (pixman_implementation_t *imp,             \
     }                                                            \
 }
 
+/****************************************************************/
+
+#define PIXMAN_MIPS_BIND_FAST_PATH_N_DST(flags, name,            \
+                                         dst_type, dst_cnt)      \
+void                                                             \
+pixman_composite_##name##_asm_mips (dst_type *dst,               \
+                                    uint32_t  src,               \
+                                    int32_t   w);                \
+                                                                 \
+static void                                                      \
+mips_composite_##name (pixman_implementation_t *imp,             \
+                       pixman_composite_info_t *info)            \
+{                                                                \
+    PIXMAN_COMPOSITE_ARGS (info);                                \
+    dst_type  *dst_line, *dst;                                   \
+    int32_t    dst_stride;                                       \
+    uint32_t   src;                                              \
+                                                                 \
+    src = _pixman_image_get_solid (                              \
+    imp, src_image, dest_image->bits.format);                    \
+                                                                 \
+    if ((flags & SKIP_ZERO_SRC) && src == 0)                     \
+        return;                                                  \
+                                                                 \
+    PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type, \
+                           dst_stride, dst_line, dst_cnt);       \
+                                                                 \
+    while (height--)                                             \
+    {                                                            \
+        dst = dst_line;                                          \
+        dst_line += dst_stride;                                  \
+                                                                 \
+        pixman_composite_##name##_asm_mips (dst, src, width);    \
+    }                                                            \
+}
+
 /*******************************************************************/
 
 #define PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST(flags, name,          \
commit e33e9d3f55590c369c532b0305f928045e0a46cb
Author: Nemanja Lukic <nemanja.lukic at rt-rk.com>
Date:   Mon Nov 12 22:48:52 2012 +0100

    MIPS: DSPr2: Added more fast-paths for SRC operation:
    
    Performance numbers before/after on MIPS-74kc @ 1GHz:
    
    lowlevel-blt-bench results
    
    Referent (before):
            src_n_8_8888 =  L1:  13.79  L2:  22.47  M: 17.55 ( 58.28%)  HT:  6.95  VT:  6.46  R:  6.34  RT:  2.07 (  20Kops/s)
               src_n_8_8 =  L1:  20.22  L2:  20.21  M: 18.20 ( 24.17%)  HT:  6.65  VT:  6.22  R:  6.11  RT:  2.03 (  20Kops/s)
    
    Optimized:
            src_n_8_8888 =  L1:  58.31  L2:  53.34  M: 25.69 ( 85.29%)  HT: 22.55  VT: 21.44  R: 19.91  RT: 10.34 (  48Kops/s)
               src_n_8_8 =  L1: 102.60  L2:  89.43  M: 65.01 ( 86.32%)  HT: 37.87  VT: 37.02  R: 32.43  RT: 12.41 (  51Kops/s)

diff --git a/pixman/pixman-mips-dspr2-asm.S b/pixman/pixman-mips-dspr2-asm.S
index b5cae16..02adb6d 100644
--- a/pixman/pixman-mips-dspr2-asm.S
+++ b/pixman/pixman-mips-dspr2-asm.S
@@ -310,6 +310,139 @@ LEAF_MIPS_DSPR2(pixman_composite_src_x888_8888_asm_mips)
 
 END(pixman_composite_src_x888_8888_asm_mips)
 
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8888_asm_mips)
+/*
+ * a0 - dst  (a8r8g8b8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+
+    SAVE_REGS_ON_STACK 0, v0
+    li       v0, 0x00ff00ff
+
+    beqz     a3, 3f
+     nop
+    addiu    t1, a3, -1
+    beqz     t1, 2f
+     nop
+
+1:
+                       /* a1 = source      (32bit constant) */
+    lbu      t0, 0(a2) /* t2 = mask        (a8) */
+    lbu      t1, 1(a2) /* t3 = mask        (a8) */
+    addiu    a2, a2, 2
+
+    MIPS_2xUN8x4_MUL_2xUN8 a1, a1, t0, t1, t2, t3, v0, t4, t5, t6, t7, t8, t9
+
+    sw       t2, 0(a0)
+    sw       t3, 4(a0)
+    addiu    a3, a3, -2
+    addiu    t2, a3, -1
+    bgtz     t2, 1b
+     addiu   a0, a0, 8
+
+    beqz     a3, 3f
+     nop
+
+2:
+    lbu      t0, 0(a2)
+    addiu    a2, a2, 1
+
+    MIPS_UN8x4_MUL_UN8 a1, t0, t1, v0, t3, t4, t5
+
+    sw       t1, 0(a0)
+    addiu    a3, a3, -1
+    addiu    a0, a0, 4
+
+3:
+    RESTORE_REGS_FROM_STACK 0, v0
+    j        ra
+     nop
+
+END(pixman_composite_src_n_8_8888_asm_mips)
+
+LEAF_MIPS_DSPR2(pixman_composite_src_n_8_8_asm_mips)
+/*
+ * a0 - dst  (a8)
+ * a1 - src  (32bit constant)
+ * a2 - mask (a8)
+ * a3 - w
+ */
+
+    li                t9, 0x00ff00ff
+    beqz              a3, 3f
+     nop
+    srl               t7, a3, 2   /* t7 = how many multiples of 4 dst pixels */
+    beqz              t7, 1f      /* branch if less than 4 src pixels */
+     nop
+
+    srl               t8, a1, 24
+    replv.ph          t8, t8
+
+0:
+    beqz              t7, 1f
+     addiu            t7, t7, -1
+    lbu               t0, 0(a2)
+    lbu               t1, 1(a2)
+    lbu               t2, 2(a2)
+    lbu               t3, 3(a2)
+
+    addiu             a2, a2, 4
+
+    precr_sra.ph.w    t1, t0, 0
+    precr_sra.ph.w    t3, t2, 0
+    precr.qb.ph       t0, t3, t1
+
+    muleu_s.ph.qbl    t2, t0, t8
+    muleu_s.ph.qbr    t3, t0, t8
+    shra_r.ph         t4, t2, 8
+    shra_r.ph         t5, t3, 8
+    and               t4, t4, t9
+    and               t5, t5, t9
+    addq.ph           t2, t2, t4
+    addq.ph           t3, t3, t5
+    shra_r.ph         t2, t2, 8
+    shra_r.ph         t3, t3, 8
+    precr.qb.ph       t2, t2, t3
+
+    sb                t2, 0(a0)
+    srl               t2, t2, 8
+    sb                t2, 1(a0)
+    srl               t2, t2, 8
+    sb                t2, 2(a0)
+    srl               t2, t2, 8
+    sb                t2, 3(a0)
+    addiu             a3, a3, -4
+    b                 0b
+     addiu            a0, a0, 4
+
+1:
+    beqz              a3, 3f
+     nop
+    srl               t8, a1, 24
+2:
+    lbu               t0, 0(a2)
+    addiu             a2, a2, 1
+
+    mul               t2, t0, t8
+    shra_r.ph         t3, t2, 8
+    andi              t3, t3, 0x00ff
+    addq.ph           t2, t2, t3
+    shra_r.ph         t2, t2, 8
+
+    sb                t2, 0(a0)
+    addiu             a3, a3, -1
+    bnez              a3, 2b
+     addiu            a0, a0, 1
+
+3:
+    j                 ra
+     nop
+
+END(pixman_composite_src_n_8_8_asm_mips)
+
 LEAF_MIPS_DSPR2(pixman_composite_over_n_8888_8888_ca_asm_mips)
 /*
  * a0 - dst  (a8r8g8b8)
diff --git a/pixman/pixman-mips-dspr2.c b/pixman/pixman-mips-dspr2.c
index 9da636d..44565e7 100644
--- a/pixman/pixman-mips-dspr2.c
+++ b/pixman/pixman-mips-dspr2.c
@@ -55,6 +55,10 @@ PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8_8,
 PIXMAN_MIPS_BIND_FAST_PATH_SRC_DST (0, add_8888_8888,
                                     uint32_t, 1, uint32_t, 1)
 
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8888,
+                                       uint8_t, 1, uint32_t, 1)
+PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (0, src_n_8_8,
+                                       uint8_t, 1, uint8_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_8888_ca,
                                        uint32_t, 1, uint32_t, 1)
 PIXMAN_MIPS_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, over_n_8888_0565_ca,
@@ -256,6 +260,11 @@ static const pixman_fast_path_t mips_dspr2_fast_paths[] =
     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, mips_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, mips_composite_src_x888_8888),
     PIXMAN_STD_FAST_PATH (SRC, r8g8b8,   null, r8g8b8,   mips_composite_src_0888_0888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8r8g8b8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8r8g8b8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8b8g8r8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   x8b8g8r8, mips_composite_src_n_8_8888),
+    PIXMAN_STD_FAST_PATH (SRC, solid,    a8,   a8,       mips_composite_src_n_8_8),
 
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, mips_composite_over_n_8888_8888_ca),
     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, mips_composite_over_n_8888_8888_ca),


More information about the xorg-commit mailing list