pixman: Branch 'master' - 4 commits
Søren Sandmann Pedersen
sandmann at kemper.freedesktop.org
Mon Apr 18 13:30:38 PDT 2011
configure.ac | 49 +++++++-----
demos/tri-test.c | 2
pixman/pixman-arm-neon-asm.S | 169 +++++++++++++++++++++++++++++++++++++++++++
pixman/pixman-arm-neon.c | 4 +
pixman/pixman-trap.c | 23 ++++-
test/composite-traps-test.c | 2
6 files changed, 220 insertions(+), 29 deletions(-)
New commits:
commit b455496890f7f941d561c284aca14783300bedd6
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Fri Mar 11 07:52:57 2011 -0500
Offset rendering in pixman_composite_trapezoids() by (x_dst, y_dst)
Previously, this function would do coordinate calculations in such a
way that (x_dst, y_dst) would only affect the alignment of the source
image, but not of the traps, which would always be considered to be in
absolute destination coordinates. This is unlike the
pixman_image_composite() function which also registers the mask to the
destination.
This patch makes it so that traps are also offset by (x_dst, y_dst).
Also add a comment explaining how this function is supposed to
operate, and update tri-test.c and composite-trap-test.c to deal with
the new semantics.
diff --git a/demos/tri-test.c b/demos/tri-test.c
index ff4779e..a71869a 100644
--- a/demos/tri-test.c
+++ b/demos/tri-test.c
@@ -36,7 +36,7 @@ main (int argc, char **argv)
dest_img,
PIXMAN_a8,
200, 200,
- 35, 5,
+ -5, 5,
ARRAY_LENGTH (tris), tris);
show_image (dest_img);
diff --git a/pixman/pixman-trap.c b/pixman/pixman-trap.c
index 2957a2b..c99f03e 100644
--- a/pixman/pixman-trap.c
+++ b/pixman/pixman-trap.c
@@ -387,6 +387,19 @@ pixman_rasterize_trapezoid (pixman_image_t * image,
}
}
+/*
+ * pixman_composite_trapezoids()
+ *
+ * All the trapezoids are conceptually rendered to an infinitely big image.
+ * The (0, 0) coordinates of this image are then aligned with the (x, y)
+ * coordinates of the source image, and then both images are aligned with
+ * the (x, y) coordinates of the destination. Then, in principle, compositing
+ * of these three images takes place across the entire destination.
+ *
+ * FIXME: However, there is currently a bug, where we restrict this compositing
+ * to the bounding box of the trapezoids. This is incorrect for operators such
+ * as SRC and IN where blank source pixels do have an effect on the destination.
+ */
PIXMAN_EXPORT void
pixman_composite_trapezoids (pixman_op_t op,
pixman_image_t * src,
@@ -419,14 +432,13 @@ pixman_composite_trapezoids (pixman_op_t op,
if (!pixman_trapezoid_valid (trap))
continue;
- pixman_rasterize_trapezoid (dst, trap, 0, 0);
+ pixman_rasterize_trapezoid (dst, trap, x_dst, y_dst);
}
}
else
{
pixman_image_t *tmp;
pixman_box32_t box;
- int x_rel, y_rel;
box.x1 = INT32_MAX;
box.y1 = INT32_MAX;
@@ -482,11 +494,10 @@ pixman_composite_trapezoids (pixman_op_t op,
pixman_rasterize_trapezoid (tmp, trap, - box.x1, - box.y1);
}
- x_rel = box.x1 + x_src - x_dst;
- y_rel = box.y1 + y_src - y_dst;
-
pixman_image_composite (op, src, tmp, dst,
- x_rel, y_rel, 0, 0, box.x1, box.y1,
+ x_src + box.x1, y_src + box.y1,
+ 0, 0,
+ x_dst + box.x1, y_dst + box.y1,
box.x2 - box.x1, box.y2 - box.y1);
pixman_image_unref (tmp);
diff --git a/test/composite-traps-test.c b/test/composite-traps-test.c
index cf30281..fa6d8a9 100644
--- a/test/composite-traps-test.c
+++ b/test/composite-traps-test.c
@@ -252,6 +252,6 @@ test_composite (int testnum,
int
main (int argc, const char *argv[])
{
- return fuzzer_test_main("composite traps", 40000, 0xA34F95C7,
+ return fuzzer_test_main("composite traps", 40000, 0xE3112106,
test_composite, argc, argv);
}
commit e75e6a4ef5c5a8ac8b0e8464f08f83fd2b6e86ed
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Sat Apr 2 23:24:48 2011 -0400
ARM: Add 'neon_composite_over_n_8888_0565_ca' fast path
This improves the performance of the firefox-talos-gfx benchmark with
the image16 backend. Benchmark on an 800 MHz ARM Cortex A8:
Before:
[ # ] backend test min(s) median(s) stddev. count
[ 0] image16 firefox-talos-gfx 121.773 122.218 0.15% 6/6
After:
[ # ] backend test min(s) median(s) stddev. count
[ 0] image16 firefox-talos-gfx 85.247 85.563 0.22% 6/6
V2: Slightly better instruction scheduling based on comments from Taekyun Kim.
V3: Eliminate all stalls from the inner loop. Also based on comments from Taekyun Kim.
diff --git a/pixman/pixman-arm-neon-asm.S b/pixman/pixman-arm-neon-asm.S
index 5e9fda3..833f18c 100644
--- a/pixman/pixman-arm-neon-asm.S
+++ b/pixman/pixman-arm-neon-asm.S
@@ -1426,6 +1426,175 @@ generate_composite_function \
/******************************************************************************/
+.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_head
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A]
+ * mask in {d24, d25, d26} [B, G, R]
+ * output: updated src in {d0, d1, d2 } [B, G, R]
+ * updated mask in {d24, d25, d26} [B, G, R]
+ */
+ vmull.u8 q0, d24, d8
+ vmull.u8 q1, d25, d9
+ vmull.u8 q6, d26, d10
+ vmull.u8 q9, d11, d25
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d25, q9, q8
+ /*
+ * convert 8 r5g6b5 pixel data from {d4, d5} to planar 8-bit format
+ * and put data into d16 - blue, d17 - green, d18 - red
+ */
+ vshrn.u16 d17, q2, #3
+ vshrn.u16 d18, q2, #8
+ vraddhn.u16 d26, q13, q6
+ vsli.u16 q2, q2, #5
+ vsri.u8 d18, d18, #5
+ vsri.u8 d17, d17, #6
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in d16 - blue, d17 - green, d18 - red
+ */
+ vmvn.8 q12, q12
+ vshrn.u16 d16, q2, #2
+ vmvn.8 d26, d26
+ vmull.u8 q6, d16, d24
+ vmull.u8 q7, d17, d25
+ vmull.u8 q11, d18, d26
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail
+ /* ... continue 'combine_over_ca' replacement */
+ vrshr.u16 q10, q6, #8
+ vrshr.u16 q14, q7, #8
+ vrshr.u16 q15, q11, #8
+ vraddhn.u16 d16, q10, q6
+ vraddhn.u16 d17, q14, q7
+ vraddhn.u16 d18, q15, q11
+ vqadd.u8 q8, q0, q8
+ vqadd.u8 d18, d2, d18
+ /*
+ * convert the results in d16, d17, d18 to r5g6b5 and store
+ * them into {d28, d29}
+ */
+ vshll.u8 q14, d18, #8
+ vshll.u8 q10, d17, #8
+ vshll.u8 q15, d16, #8
+ vsri.u16 q14, q10, #5
+ vsri.u16 q14, q15, #11
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
+ fetch_mask_pixblock
+ vrshr.u16 q10, q6, #8
+ vrshr.u16 q14, q7, #8
+ vld1.16 {d4, d5}, [DST_R, :128]!
+ vrshr.u16 q15, q11, #8
+ vraddhn.u16 d16, q10, q6
+ vraddhn.u16 d17, q14, q7
+ vraddhn.u16 d22, q15, q11
+ /* process_pixblock_head */
+ /*
+ * 'combine_mask_ca' replacement
+ *
+ * input: solid src (n) in {d8, d9, d10, d11} [B, G, R, A]
+ * mask in {d24, d25, d26} [B, G, R]
+ * output: updated src in {d0, d1, d2 } [B, G, R]
+ * updated mask in {d24, d25, d26} [B, G, R]
+ */
+ vmull.u8 q1, d25, d9
+ vqadd.u8 q8, q0, q8
+ vmull.u8 q0, d24, d8
+ vqadd.u8 d22, d2, d22
+ vmull.u8 q6, d26, d10
+ /*
+ * convert the result in d16, d17, d22 to r5g6b5 and store
+ * it into {d28, d29}
+ */
+ vshll.u8 q14, d22, #8
+ vshll.u8 q10, d17, #8
+ vshll.u8 q15, d16, #8
+ vmull.u8 q9, d11, d25
+ vsri.u16 q14, q10, #5
+ vmull.u8 q12, d11, d24
+ vmull.u8 q13, d11, d26
+ vsri.u16 q14, q15, #11
+ cache_preload 8, 8
+ vrshr.u16 q8, q0, #8
+ vrshr.u16 q10, q1, #8
+ vrshr.u16 q11, q6, #8
+ vraddhn.u16 d0, q0, q8
+ vraddhn.u16 d1, q1, q10
+ vraddhn.u16 d2, q6, q11
+ vrshr.u16 q11, q12, #8
+ vrshr.u16 q8, q9, #8
+ vrshr.u16 q6, q13, #8
+ vraddhn.u16 d25, q9, q8
+ /*
+ * convert 8 r5g6b5 pixel data from {d4, d5} to planar
+ * 8-bit format and put data into d16 - blue, d17 - green,
+ * d18 - red
+ */
+ vshrn.u16 d17, q2, #3
+ vshrn.u16 d18, q2, #8
+ vraddhn.u16 d24, q12, q11
+ vraddhn.u16 d26, q13, q6
+ vsli.u16 q2, q2, #5
+ vsri.u8 d18, d18, #5
+ vsri.u8 d17, d17, #6
+ /*
+ * 'combine_over_ca' replacement
+ *
+ * output: updated dest in d16 - blue, d17 - green, d18 - red
+ */
+ vmvn.8 q12, q12
+ vshrn.u16 d16, q2, #2
+ vmvn.8 d26, d26
+ vmull.u8 q7, d17, d25
+ vmull.u8 q6, d16, d24
+ vmull.u8 q11, d18, d26
+ vst1.16 {d28, d29}, [DST_W, :128]!
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_init
+ add DUMMY, sp, #ARGS_STACK_OFFSET
+ vpush {d8-d15}
+ vld1.32 {d11[0]}, [DUMMY]
+ vdup.8 d8, d11[0]
+ vdup.8 d9, d11[1]
+ vdup.8 d10, d11[2]
+ vdup.8 d11, d11[3]
+.endm
+
+.macro pixman_composite_over_n_8888_0565_ca_cleanup
+ vpop {d8-d15}
+.endm
+
+generate_composite_function \
+ pixman_composite_over_n_8888_0565_ca_asm_neon, 0, 32, 16, \
+ FLAG_DST_READWRITE | FLAG_DEINTERLEAVE_32BPP, \
+ 8, /* number of pixels, processed in a single block */ \
+ 5, /* prefetch distance */ \
+ pixman_composite_over_n_8888_0565_ca_init, \
+ pixman_composite_over_n_8888_0565_ca_cleanup, \
+ pixman_composite_over_n_8888_0565_ca_process_pixblock_head, \
+ pixman_composite_over_n_8888_0565_ca_process_pixblock_tail, \
+ pixman_composite_over_n_8888_0565_ca_process_pixblock_tail_head
+
+/******************************************************************************/
+
.macro pixman_composite_in_n_8_process_pixblock_head
/* expecting source data in {d0, d1, d2, d3} */
/* and destination data in {d4, d5, d6, d7} */
diff --git a/pixman/pixman-arm-neon.c b/pixman/pixman-arm-neon.c
index 0a10ca1..77875ad 100644
--- a/pixman/pixman-arm-neon.c
+++ b/pixman/pixman-arm-neon.c
@@ -80,6 +80,8 @@ PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8888,
uint8_t, 1, uint32_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_8888_ca,
uint32_t, 1, uint32_t, 1)
+PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8888_0565_ca,
+ uint32_t, 1, uint16_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, over_n_8_8,
uint8_t, 1, uint8_t, 1)
PIXMAN_ARM_BIND_FAST_PATH_N_MASK_DST (SKIP_ZERO_SRC, neon, add_n_8_8,
@@ -282,6 +284,8 @@ static const pixman_fast_path_t arm_neon_fast_paths[] =
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, neon_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, neon_composite_over_n_8888_8888_ca),
PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, neon_composite_over_n_8888_8888_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, neon_composite_over_n_8888_0565_ca),
+ PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, neon_composite_over_n_8888_0565_ca),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, a8r8g8b8, neon_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, x8r8g8b8, neon_composite_over_8888_n_8888),
PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, solid, r5g6b5, neon_composite_over_8888_n_0565),
commit 1670b952143284f480c39ff087b5694a64eb7db3
Author: Gilles Espinasse <g.esp at free.fr>
Date: Tue Apr 12 22:44:56 2011 +0200
Fix OpenMP not supported case
PIXMAN_LINK_WITH_ENV did not fail unless -Wall -Werror is used.
So even when the compiler did not support OpenMP, USE_OPENMP was defined.
Fix that by running the second OpenMP test only when first AC_OPENMP find supported
configure tested in the cases :
gcc without libgomp support, no openmp option, --enable-openmp and --disable-openmp
gcc with libgomp support, no openmp option, --enable-openmp and --disable-openmp
Not tested with autoconf version not knowing openmp (<2.62)
Warn when --enable-openmp is requested but no support is found
Signed-off-by: Gilles Espinasse <g.esp at free.fr>
diff --git a/configure.ac b/configure.ac
index 0b526d6..09a4948 100644
--- a/configure.ac
+++ b/configure.ac
@@ -192,35 +192,43 @@ dnl =========================================================================
dnl OpenMP for the test suite?
dnl
-# Check for OpenMP support (only supported by autoconf >=2.62)
+# Check for OpenMP support only when autoconf support that (require autoconf >=2.62)
OPENMP_CFLAGS=
m4_ifdef([AC_OPENMP], [AC_OPENMP])
-m4_define([openmp_test_program],[dnl
-#include <stdio.h>
+if test "x$enable_openmp" = "xyes" && test "x$ac_cv_prog_c_openmp" = "xunsupported" ; then
+ AC_MSG_WARN([OpenMP support requested but found unsupported])
+fi
-extern unsigned int lcg_seed;
-#pragma omp threadprivate(lcg_seed)
-unsigned int lcg_seed;
+dnl May not fail to link without -Wall -Werror added
+dnl So try to link only when openmp is supported
+dnl ac_cv_prog_c_openmp is not defined when --disable-openmp is used
+if test "x$ac_cv_prog_c_openmp" != "xunsupported" && test "x$ac_cv_prog_c_openmp" != "x"; then
+ m4_define([openmp_test_program],[dnl
+ #include <stdio.h>
-unsigned function(unsigned a, unsigned b)
-{
+ extern unsigned int lcg_seed;
+ #pragma omp threadprivate(lcg_seed)
+ unsigned int lcg_seed;
+
+ unsigned function(unsigned a, unsigned b)
+ {
lcg_seed ^= b;
return ((a + b) ^ a ) + lcg_seed;
-}
+ }
-int main(int argc, char **argv)
-{
+ int main(int argc, char **argv)
+ {
int i;
int n1 = 0, n2 = argc;
unsigned checksum = 0;
int verbose = argv != NULL;
unsigned (*test_function)(unsigned, unsigned);
test_function = function;
- #pragma omp parallel for reduction(+:checksum) default(none) \
+ #pragma omp parallel for reduction(+:checksum) default(none) \
shared(n1, n2, test_function, verbose)
for (i = n1; i < n2; i++)
- {
+ {
unsigned crc = test_function (i, 0);
if (verbose)
printf ("%d: %08X\n", i, crc);
@@ -228,18 +236,17 @@ int main(int argc, char **argv)
}
printf("%u\n", checksum);
return 0;
-}
-])
+ }
+ ])
-PIXMAN_LINK_WITH_ENV(
+ PIXMAN_LINK_WITH_ENV(
[CFLAGS="$OPENMP_CFLAGS" LDFLAGS="$OPENMP_CFLAGS"],
[openmp_test_program],
[have_openmp=yes],
[have_openmp=no])
-if test "x$have_openmp" = "xyes"; then
- AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
-else
- OPENMP_CFLAGS=""
+ if test "x$have_openmp" = "xyes" ; then
+ AC_DEFINE(USE_OPENMP, 1, [use OpenMP in the test suite])
+ fi
fi
AC_SUBST(OPENMP_CFLAGS)
commit b9e8f7fb7494e4ee4be56d1555632233a494b28e
Author: Gilles Espinasse <g.esp at free.fr>
Date: Tue Apr 12 22:44:25 2011 +0200
Fix missing AC_MSG_RESULT value from Werror test
Use the correct variable name
Signed-off-by: Gilles Espinasse <g.esp at free.fr>
diff --git a/configure.ac b/configure.ac
index db9a883..0b526d6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -119,7 +119,7 @@ for w in -Werror -errwarn; do
[CFLAGS=$w],
[int main(int c, char **v) { (void)c; (void)v; return 0; }],
[WERROR=$w; yesno=yes], [yesno=no])
- AC_MSG_RESULT($_yesno)
+ AC_MSG_RESULT($yesno)
fi
done
More information about the xorg-commit
mailing list