pixman: Branch 'master' - 4 commits
Siarhei Siamashka
siamashka at kemper.freedesktop.org
Tue Oct 1 13:45:05 PDT 2013
configure.ac | 29 +-
pixman/pixman-compiler.h | 2
pixman/pixman-vmx.c | 539 +++++++++++++++++++++++++++++++++++++++--------
test/Makefile.am | 6
test/Makefile.sources | 1
test/thread-test.c | 197 +++++++++++++++++
6 files changed, 675 insertions(+), 99 deletions(-)
New commits:
commit 7d05a7f4dc825f9c778e534fdabb749199c2e439
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date: Sat Sep 28 04:51:21 2013 +0300
vmx: there is no need to handle unaligned destination anymore
So the redundant variables, memory reads/writes and reshuffles
can be safely removed. For example, this makes the inner loop
of 'vmx_combine_add_u_no_mask' function much more simple.
Before:
7a20:7d a8 48 ce lvx v13,r8,r9
7a24:7d 80 48 ce lvx v12,r0,r9
7a28:7d 28 50 ce lvx v9,r8,r10
7a2c:7c 20 50 ce lvx v1,r0,r10
7a30:39 4a 00 10 addi r10,r10,16
7a34:10 0d 62 eb vperm v0,v13,v12,v11
7a38:10 21 4a 2b vperm v1,v1,v9,v8
7a3c:11 2c 6a eb vperm v9,v12,v13,v11
7a40:10 21 4a 00 vaddubs v1,v1,v9
7a44:11 a1 02 ab vperm v13,v1,v0,v10
7a48:10 00 0a ab vperm v0,v0,v1,v10
7a4c:7d a8 49 ce stvx v13,r8,r9
7a50:7c 00 49 ce stvx v0,r0,r9
7a54:39 29 00 10 addi r9,r9,16
7a58:42 00 ff c8 bdnz+ 7a20 <.vmx_combine_add_u_no_mask+0x120>
After:
76c0:7c 00 48 ce lvx v0,r0,r9
76c4:7d a8 48 ce lvx v13,r8,r9
76c8:39 29 00 10 addi r9,r9,16
76cc:7c 20 50 ce lvx v1,r0,r10
76d0:10 00 6b 2b vperm v0,v0,v13,v12
76d4:10 00 0a 00 vaddubs v0,v0,v1
76d8:7c 00 51 ce stvx v0,r0,r10
76dc:39 4a 00 10 addi r10,r10,16
76e0:42 00 ff e0 bdnz+ 76c0 <.vmx_combine_add_u_no_mask+0x120>
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index 130d78e..c33631c 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -134,15 +134,11 @@ over (vector unsigned int src,
source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKS(dest, source) \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
#define COMPUTE_SHIFT_MASKC(dest, source, mask) \
mask ## _mask = vec_lvsl (0, mask); \
- dest ## _mask = vec_lvsl (0, dest); \
- source ## _mask = vec_lvsl (0, source); \
- store_mask = vec_lvsr (0, dest);
+ source ## _mask = vec_lvsl (0, source);
/* notice you have to declare temp vars...
* Note: tmp3 and tmp4 must remain untouched!
@@ -151,23 +147,17 @@ over (vector unsigned int src,
#define LOAD_VECTORS(dest, source) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask);
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest);
#define LOAD_VECTORSC(dest, source, mask) \
tmp1 = (typeof(tmp1))vec_ld (0, source); \
tmp2 = (typeof(tmp2))vec_ld (15, source); \
- tmp3 = (typeof(tmp3))vec_ld (0, dest); \
v ## source = (typeof(v ## source)) \
vec_perm (tmp1, tmp2, source ## _mask); \
- tmp4 = (typeof(tmp4))vec_ld (15, dest); \
tmp1 = (typeof(tmp1))vec_ld (0, mask); \
- v ## dest = (typeof(v ## dest)) \
- vec_perm (tmp3, tmp4, dest ## _mask); \
+ v ## dest = (typeof(v ## dest))vec_ld (0, dest); \
tmp2 = (typeof(tmp2))vec_ld (15, mask); \
v ## mask = (typeof(v ## mask)) \
vec_perm (tmp1, tmp2, mask ## _mask);
@@ -178,11 +168,7 @@ over (vector unsigned int src,
splat_alpha (v ## mask));
#define STORE_VECTOR(dest) \
- edges = vec_perm (tmp4, tmp3, dest ## _mask); \
- tmp3 = vec_perm ((vector unsigned char)v ## dest, edges, store_mask); \
- tmp1 = vec_perm (edges, (vector unsigned char)v ## dest, store_mask); \
- vec_st ((vector unsigned int) tmp3, 15, dest); \
- vec_st ((vector unsigned int) tmp1, 0, dest);
+ vec_st ((vector unsigned int) v ## dest, 0, dest);
static void
vmx_combine_over_u_no_mask (uint32_t * dest,
@@ -191,8 +177,7 @@ vmx_combine_over_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -242,8 +227,7 @@ vmx_combine_over_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -314,8 +298,7 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -363,8 +346,7 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -432,8 +414,7 @@ vmx_combine_in_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -478,8 +459,7 @@ vmx_combine_in_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -544,8 +524,7 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -592,8 +571,7 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -660,8 +638,7 @@ vmx_combine_out_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -708,8 +685,7 @@ vmx_combine_out_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -774,8 +750,7 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -823,8 +798,7 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -891,8 +865,7 @@ vmx_combine_atop_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -944,8 +917,7 @@ vmx_combine_atop_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1021,8 +993,7 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1074,8 +1045,7 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1151,8 +1121,7 @@ vmx_combine_xor_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1204,8 +1173,7 @@ vmx_combine_xor_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1281,8 +1249,7 @@ vmx_combine_add_u_no_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1328,8 +1295,7 @@ vmx_combine_add_u_mask (uint32_t * dest,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, src_mask, mask_mask, store_mask;
+ vector unsigned char tmp1, tmp2, src_mask, mask_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1397,8 +1363,7 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1448,8 +1413,7 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1507,8 +1471,7 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1564,8 +1527,7 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1619,8 +1581,7 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1675,8 +1636,7 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1733,8 +1693,7 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1791,8 +1750,7 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask, vsrca;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1858,8 +1816,7 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1922,8 +1879,7 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
@@ -1986,8 +1942,7 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
{
int i;
vector unsigned int vdest, vsrc, vmask;
- vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
- dest_mask, mask_mask, src_mask, store_mask;
+ vector unsigned char tmp1, tmp2, mask_mask, src_mask;
while (width && ((uintptr_t)dest & 15))
{
commit b6c5ba06f0c5c0bd8d186e7a4879fd3b33e7e13f
Author: Siarhei Siamashka <siarhei.siamashka at gmail.com>
Date: Sat Sep 28 03:48:07 2013 +0300
vmx: align destination to fix valgrind invalid memory writes
The SIMD optimized inner loops in the VMX/Altivec code are trying
to emulate unaligned accesses to the destination buffer. For each
4 pixels (which fit into a 128-bit register) the current
implementation:
1. first performs two aligned reads, which cover the needed data
2. reshuffles bytes to get the needed data in a single vector register
3. does all the necessary calculations
4. reshuffles bytes back to their original location in two registers
5. performs two aligned writes back to the destination buffer
Unfortunately in the case if the destination buffer is unaligned and
the width is a perfect multiple of 4 pixels, we may have some writes
crossing the boundaries of the destination buffer. In a multithreaded
environment this may potentially corrupt the data outside of the
destination buffer if it is concurrently read and written by some
other thread.
The valgrind report for blitters-test is full of:
==23085== Invalid write of size 8
==23085== at 0x1004B0B4: vmx_combine_add_u (pixman-vmx.c:1089)
==23085== by 0x100446EF: general_composite_rect (pixman-general.c:214)
==23085== by 0x10002537: test_composite (blitters-test.c:363)
==23085== by 0x1000369B: fuzzer_test_main._omp_fn.0 (utils.c:733)
==23085== by 0x10004943: fuzzer_test_main (utils.c:728)
==23085== by 0x10002C17: main (blitters-test.c:397)
==23085== Address 0x5188218 is 0 bytes after a block of size 88 alloc'd
==23085== at 0x4051DA0: memalign (vg_replace_malloc.c:581)
==23085== by 0x4051E7B: posix_memalign (vg_replace_malloc.c:709)
==23085== by 0x10004CFF: aligned_malloc (utils.c:833)
==23085== by 0x10001DCB: create_random_image (blitters-test.c:47)
==23085== by 0x10002263: test_composite (blitters-test.c:283)
==23085== by 0x1000369B: fuzzer_test_main._omp_fn.0 (utils.c:733)
==23085== by 0x10004943: fuzzer_test_main (utils.c:728)
==23085== by 0x10002C17: main (blitters-test.c:397)
This patch addresses the problem by first aligning the destination
buffer at a 16 byte boundary in each combiner function. This trick
is borrowed from the pixman SSE2 code.
It allows to pass the new thread-test on PowerPC VMX/Altivec systems and
also resolves the "make check" failure reported for POWER7 hardware:
http://lists.freedesktop.org/archives/pixman/2013-August/002871.html
diff --git a/pixman/pixman-vmx.c b/pixman/pixman-vmx.c
index f629003..130d78e 100644
--- a/pixman/pixman-vmx.c
+++ b/pixman/pixman-vmx.c
@@ -194,6 +194,18 @@ vmx_combine_over_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -233,6 +245,22 @@ vmx_combine_over_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia;
+
+ UN8x4_MUL_UN8 (s, m);
+
+ ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (d, ia, s);
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -289,6 +317,17 @@ vmx_combine_over_reverse_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -327,6 +366,20 @@ vmx_combine_over_reverse_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ia, d);
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -382,6 +435,16 @@ vmx_combine_in_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8 (s, a);
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -418,6 +481,19 @@ vmx_combine_in_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -471,6 +547,17 @@ vmx_combine_in_reverse_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t d = *dest;
+ uint32_t a = ALPHA_8 (*src++);
+
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -508,6 +595,20 @@ vmx_combine_in_reverse_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t d = *dest;
+ uint32_t a = *src++;
+
+ UN8x4_MUL_UN8 (a, m);
+ a = ALPHA_8 (a);
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -562,6 +663,17 @@ vmx_combine_out_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (~(*dest));
+
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -599,6 +711,19 @@ vmx_combine_out_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t a = ALPHA_8 (~(*dest));
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_MUL_UN8 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -652,6 +777,17 @@ vmx_combine_out_reverse_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t d = *dest;
+ uint32_t a = ALPHA_8 (~(*src++));
+
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -690,6 +826,20 @@ vmx_combine_out_reverse_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t d = *dest;
+ uint32_t a = *src++;
+
+ UN8x4_MUL_UN8 (a, m);
+ a = ALPHA_8 (~a);
+ UN8x4_MUL_UN8 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -744,6 +894,19 @@ vmx_combine_atop_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -784,6 +947,24 @@ vmx_combine_atop_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t dest_a = ALPHA_8 (d);
+ uint32_t src_ia;
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_a, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -843,6 +1024,19 @@ vmx_combine_atop_reverse_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_a = ALPHA_8 (s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -883,6 +1077,24 @@ vmx_combine_atop_reverse_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_a;
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_a = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_a);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -942,6 +1154,19 @@ vmx_combine_xor_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_ia = ALPHA_8 (~s);
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -982,6 +1207,24 @@ vmx_combine_xor_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t src_ia;
+ uint32_t dest_ia = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8 (s, m);
+
+ src_ia = ALPHA_8 (~s);
+
+ UN8x4_MUL_UN8_ADD_UN8x4_MUL_UN8 (s, dest_ia, d, src_ia);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1041,6 +1284,17 @@ vmx_combine_add_u_no_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_ADD_UN8x4 (d, s);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKS (dest, src);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
for (i = width / 4; i > 0; i--)
@@ -1077,6 +1331,19 @@ vmx_combine_add_u_mask (uint32_t * dest,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, src_mask, mask_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t m = ALPHA_8 (*mask++);
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_MUL_UN8 (s, m);
+ UN8x4_ADD_UN8x4 (d, s);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1133,6 +1400,17 @@ vmx_combine_src_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+
+ UN8x4_MUL_UN8x4 (s, a);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1173,6 +1451,21 @@ vmx_combine_over_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ~a, s);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1217,6 +1510,20 @@ vmx_combine_over_reverse_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t ida = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8_ADD_UN8x4 (s, ida, d);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf("%s\n",__PRETTY_FUNCTION__); */
@@ -1260,6 +1567,19 @@ vmx_combine_in_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t da = ALPHA_8 (*dest);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (s, da);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1302,6 +1622,19 @@ vmx_combine_in_reverse_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (*src++);
+
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4 (d, a);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1345,6 +1678,20 @@ vmx_combine_out_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (s, da);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1389,6 +1736,20 @@ vmx_combine_out_reverse_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4 (d, ~a);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1433,6 +1794,22 @@ vmx_combine_atop_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1484,6 +1861,22 @@ vmx_combine_atop_reverse_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1532,6 +1925,22 @@ vmx_combine_xor_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+ uint32_t sa = ALPHA_8 (s);
+ uint32_t da = ALPHA_8 (~d);
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_MUL_UN8 (a, sa);
+ UN8x4_MUL_UN8x4_ADD_UN8x4_MUL_UN8 (d, ~a, s, da);
+
+ *dest++ = d;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
@@ -1580,6 +1989,19 @@ vmx_combine_add_ca (pixman_implementation_t *imp,
vector unsigned char tmp1, tmp2, tmp3, tmp4, edges,
dest_mask, mask_mask, src_mask, store_mask;
+ while (width && ((uintptr_t)dest & 15))
+ {
+ uint32_t a = *mask++;
+ uint32_t s = *src++;
+ uint32_t d = *dest;
+
+ UN8x4_MUL_UN8x4 (s, a);
+ UN8x4_ADD_UN8x4 (s, d);
+
+ *dest++ = s;
+ width--;
+ }
+
COMPUTE_SHIFT_MASKC (dest, src, mask);
/* printf ("%s\n",__PRETTY_FUNCTION__); */
commit 0438435b9c915b61af21446b2cb2f77a2b98a3b9
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Sat Sep 28 01:10:24 2013 -0400
test: Add new thread-test program
This test program allocates an array of 16 * 7 uint32_ts and spawns 16
threads that each use 7 of the allocated uint32_ts as a destination
image for a large number of composite operations. Each thread then
computes and returns a checksum for the image. Finally, the main
thread computes a checksum of the checksums and verifies that it
matches expectations.
The purpose of this test is catch errors where memory outside images
is read and then written back. Such out-of-bounds accesses are broken
when multiple threads are involved, because the threads will race to
read and write the shared memory.
V2:
- Incorporate fixes from Siarhei for endianness and undefined behavior
regarding argument evaluation
- Make the images 7 pixels wide since the bug only happens when the
composite width is greater than 4.
- Compute a checksum of the checksums so that you don't have to
update 16 values if something changes.
V3: Remove stray dollar sign
diff --git a/test/Makefile.am b/test/Makefile.am
index 5d901d5..88dc36d 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1,8 +1,8 @@
include $(top_srcdir)/test/Makefile.sources
-AM_CFLAGS = $(OPENMP_CFLAGS)
-AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS)
-LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS)
+AM_CFLAGS = $(OPENMP_CFLAGS) $(PTHREAD_CFLAGS)
+AM_LDFLAGS = $(OPENMP_CFLAGS) $(TESTPROGS_EXTRA_LDFLAGS) $(PTHREAD_LDFLAGS)
+LDADD = libutils.la $(top_builddir)/pixman/libpixman-1.la -lm $(PNG_LIBS) $(PTHREAD_LIBS)
AM_CPPFLAGS = -I$(top_srcdir)/pixman -I$(top_builddir)/pixman $(PNG_CFLAGS)
libutils_la_SOURCES = $(libutils_sources) $(libutils_headers)
diff --git a/test/Makefile.sources b/test/Makefile.sources
index 2fabdb5..2ae5d9f 100644
--- a/test/Makefile.sources
+++ b/test/Makefile.sources
@@ -13,6 +13,7 @@ TESTPROGRAMS = \
infinite-loop \
trap-crasher \
alpha-loop \
+ thread-test \
scaling-crash-test \
scaling-helpers-test \
gradient-crash-test \
diff --git a/test/thread-test.c b/test/thread-test.c
new file mode 100644
index 0000000..f24c31d
--- /dev/null
+++ b/test/thread-test.c
@@ -0,0 +1,197 @@
+#include <config.h>
+
+#ifndef HAVE_PTHREADS
+
+int main ()
+{
+ printf ("Skipped thread-test - pthreads not supported\n");
+ return 0;
+}
+
+#else
+
+#include <stdlib.h>
+#include <pthread.h>
+#include "utils.h"
+
+typedef struct
+{
+ int thread_no;
+ uint32_t *dst_buf;
+} info_t;
+
+static const pixman_op_t operators[] =
+{
+ PIXMAN_OP_SRC,
+ PIXMAN_OP_OVER,
+ PIXMAN_OP_ADD,
+ PIXMAN_OP_CLEAR,
+ PIXMAN_OP_SRC,
+ PIXMAN_OP_DST,
+ PIXMAN_OP_OVER,
+ PIXMAN_OP_OVER_REVERSE,
+ PIXMAN_OP_IN,
+ PIXMAN_OP_IN_REVERSE,
+ PIXMAN_OP_OUT,
+ PIXMAN_OP_OUT_REVERSE,
+ PIXMAN_OP_ATOP,
+ PIXMAN_OP_ATOP_REVERSE,
+ PIXMAN_OP_XOR,
+ PIXMAN_OP_ADD,
+ PIXMAN_OP_SATURATE,
+ PIXMAN_OP_DISJOINT_CLEAR,
+ PIXMAN_OP_DISJOINT_SRC,
+ PIXMAN_OP_DISJOINT_DST,
+ PIXMAN_OP_DISJOINT_OVER,
+ PIXMAN_OP_DISJOINT_OVER_REVERSE,
+ PIXMAN_OP_DISJOINT_IN,
+ PIXMAN_OP_DISJOINT_IN_REVERSE,
+ PIXMAN_OP_DISJOINT_OUT,
+ PIXMAN_OP_DISJOINT_OUT_REVERSE,
+ PIXMAN_OP_DISJOINT_ATOP,
+ PIXMAN_OP_DISJOINT_ATOP_REVERSE,
+ PIXMAN_OP_DISJOINT_XOR,
+ PIXMAN_OP_CONJOINT_CLEAR,
+ PIXMAN_OP_CONJOINT_SRC,
+ PIXMAN_OP_CONJOINT_DST,
+ PIXMAN_OP_CONJOINT_OVER,
+ PIXMAN_OP_CONJOINT_OVER_REVERSE,
+ PIXMAN_OP_CONJOINT_IN,
+ PIXMAN_OP_CONJOINT_IN_REVERSE,
+ PIXMAN_OP_CONJOINT_OUT,
+ PIXMAN_OP_CONJOINT_OUT_REVERSE,
+ PIXMAN_OP_CONJOINT_ATOP,
+ PIXMAN_OP_CONJOINT_ATOP_REVERSE,
+ PIXMAN_OP_CONJOINT_XOR,
+ PIXMAN_OP_MULTIPLY,
+ PIXMAN_OP_SCREEN,
+ PIXMAN_OP_OVERLAY,
+ PIXMAN_OP_DARKEN,
+ PIXMAN_OP_LIGHTEN,
+ PIXMAN_OP_COLOR_DODGE,
+ PIXMAN_OP_COLOR_BURN,
+ PIXMAN_OP_HARD_LIGHT,
+ PIXMAN_OP_DIFFERENCE,
+ PIXMAN_OP_EXCLUSION,
+};
+
+static const pixman_format_code_t formats[] =
+{
+ PIXMAN_a8r8g8b8,
+ PIXMAN_r5g6b5,
+ PIXMAN_a8,
+ PIXMAN_a4,
+ PIXMAN_a1,
+ PIXMAN_b5g6r5,
+ PIXMAN_r8g8b8a8,
+ PIXMAN_a4r4g4b4
+};
+
+#define N_ROUNDS 8192
+
+#define RAND_ELT(arr) \
+ arr[prng_rand() % ARRAY_LENGTH (arr)]
+
+#define DEST_WIDTH (7)
+
+static void *
+thread (void *data)
+{
+ info_t *info = data;
+ uint32_t crc32 = 0x0;
+ uint32_t src_buf[64];
+ pixman_image_t *dst_img, *src_img;
+ int i;
+
+ prng_srand (info->thread_no);
+
+ for (i = 0; i < N_ROUNDS; ++i)
+ {
+ pixman_op_t op;
+ int rand1, rand2;
+
+ prng_randmemset (info->dst_buf, DEST_WIDTH * sizeof (uint32_t), 0);
+ prng_randmemset (src_buf, sizeof (src_buf), 0);
+
+ src_img = pixman_image_create_bits (
+ RAND_ELT (formats), 4, 4, src_buf, 16);
+ dst_img = pixman_image_create_bits (
+ RAND_ELT (formats), DEST_WIDTH, 1, info->dst_buf,
+ DEST_WIDTH * sizeof (uint32_t));
+
+ image_endian_swap (src_img);
+ image_endian_swap (dst_img);
+
+ rand2 = prng_rand() % 4;
+ rand1 = prng_rand() % 4;
+ op = RAND_ELT (operators);
+
+ pixman_image_composite32 (
+ op,
+ src_img, NULL, dst_img,
+ rand1, rand2, 0, 0, 0, 0, DEST_WIDTH, 1);
+
+ crc32 = compute_crc32_for_image (crc32, dst_img);
+
+ pixman_image_unref (src_img);
+ pixman_image_unref (dst_img);
+ }
+
+ return (void *)(uintptr_t)crc32;
+}
+
+static inline uint32_t
+byteswap32 (uint32_t x)
+{
+ return ((x & ((uint32_t)0xFF << 24)) >> 24) |
+ ((x & ((uint32_t)0xFF << 16)) >> 8) |
+ ((x & ((uint32_t)0xFF << 8)) << 8) |
+ ((x & ((uint32_t)0xFF << 0)) << 24);
+}
+
+int
+main (void)
+{
+ uint32_t dest[16 * DEST_WIDTH];
+ info_t info[16] = { { 0 } };
+ pthread_t threads[16];
+ void *retvals[16];
+ uint32_t crc32s[16], crc32;
+ int i;
+
+ for (i = 0; i < 16; ++i)
+ {
+ info[i].thread_no = i;
+ info[i].dst_buf = &dest[i * DEST_WIDTH];
+ }
+
+ for (i = 0; i < 16; ++i)
+ pthread_create (&threads[i], NULL, thread, &info[i]);
+
+ for (i = 0; i < 16; ++i)
+ pthread_join (threads[i], &retvals[i]);
+
+ for (i = 0; i < 16; ++i)
+ {
+ crc32s[i] = (uintptr_t)retvals[i];
+
+ if (is_little_endian())
+ crc32s[i] = byteswap32 (crc32s[i]);
+ }
+
+ crc32 = compute_crc32 (0, crc32s, sizeof crc32s);
+
+#define EXPECTED 0xFD497D8D
+
+ if (crc32 != EXPECTED)
+ {
+ printf ("thread-test failed. Got checksum 0x%08X, expected 0x%08X\n",
+ crc32, EXPECTED);
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif
+
commit 65829504073425362fc56995a1dcc8cc464b751a
Author: Søren Sandmann Pedersen <ssp at redhat.com>
Date: Sat Sep 28 01:03:55 2013 -0400
Rename HAVE_PTHREAD_SETSPECIFIC to HAVE_PTHREADS
The test for pthread_setspecific() can be used as a general test for
whether pthreads are available, so rename the variable from
HAVE_PTHREAD_SETSPECIFIC to HAVE_PTHREADS and run the test even when
better support for thread local variables are available.
However, the pthread arguments are still only added to CFLAGS and
LDFLAGS when pthread_setspecific() is used for thread local variables.
V2: AC_SUBST(PTHREAD_CFLAGS)
diff --git a/configure.ac b/configure.ac
index 263c63e..2dd4776 100644
--- a/configure.ac
+++ b/configure.ac
@@ -961,37 +961,38 @@ main ()
]]))
AC_DEFUN([PIXMAN_CHECK_PTHREAD],[dnl
- if test "z$support_for_pthread_setspecific" != "zyes"; then
+ if test "z$support_for_pthreads" != "zyes"; then
PIXMAN_LINK_WITH_ENV(
[$1], [pthread_test_program],
[PTHREAD_CFLAGS="$CFLAGS"
PTHREAD_LIBS="$LIBS"
PTHREAD_LDFLAGS="$LDFLAGS"
- support_for_pthread_setspecific=yes])
+ support_for_pthreads=yes])
fi
])
-if test $ac_cv_tls = none ; then
- support_for_pthread_setspecific=no
+support_for_pthreads=no
- AC_MSG_CHECKING(for pthread_setspecific)
+AC_MSG_CHECKING(for pthreads)
- PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"])
- PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"])
- PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"])
+PIXMAN_CHECK_PTHREAD([CFLAGS="-pthread"; LDFLAGS="-pthread"])
+PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LIBS="-lpthread"])
+PIXMAN_CHECK_PTHREAD([CFLAGS="-D_REENTRANT"; LDFLAGS="-lroot"])
- if test $support_for_pthread_setspecific = yes; then
- CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
- AC_DEFINE([HAVE_PTHREAD_SETSPECIFIC], [], [Whether pthread_setspecific() is supported])
+if test $support_for_pthreads = yes; then
+ AC_DEFINE([HAVE_PTHREADS], [], [Whether pthreads is supported])
+ if test $ac_cv_tls = none ; then
+ CFLAGS="$CFLAGS $PTHREAD_CFLAGS"
fi
-
- AC_MSG_RESULT($support_for_pthread_setspecific);
fi
+AC_MSG_RESULT($support_for_pthreads)
+
AC_SUBST(TOOLCHAIN_SUPPORTS__THREAD)
-AC_SUBST(HAVE_PTHREAD_SETSPECIFIC)
+AC_SUBST(HAVE_PTHREADS)
AC_SUBST(PTHREAD_LDFLAGS)
AC_SUBST(PTHREAD_LIBS)
+AC_SUBST(PTHREAD_CFLAGS)
dnl =====================================
dnl __attribute__((constructor))
diff --git a/pixman/pixman-compiler.h b/pixman/pixman-compiler.h
index 9b190b4..2489adc 100644
--- a/pixman/pixman-compiler.h
+++ b/pixman/pixman-compiler.h
@@ -178,7 +178,7 @@
# define PIXMAN_GET_THREAD_LOCAL(name) \
(&name)
-#elif defined(HAVE_PTHREAD_SETSPECIFIC)
+#elif defined(HAVE_PTHREADS)
#include <pthread.h>
More information about the xorg-commit
mailing list