pixman: Branch 'master' - 2 commits
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Apr 18 04:01:29 UTC 2025
pixman/pixman-arma64-neon-asm.S | 95 ++++++++++++++----------------------
pixman/pixman-arma64-neon-asm.h | 21 +++-----
test/meson.build | 1
test/neg-stride-test.c | 105 ++++++++++++++++++++++++++++++++++++++++
4 files changed, 152 insertions(+), 70 deletions(-)
New commits:
commit 106323bc15c7eb89a033cf8ff38f52bca8a4b53c
Author: Joel May <git at jmay.us>
Date: Wed Apr 9 17:54:25 2025 -0700
Fix arm64 advanced prefetcher
https://gitlab.freedesktop.org/pixman/pixman/-/issues/119
When the advanced preloader reached the end of a scanline, it advanced
the `PF_SRC`, `PF_DST`, and `PF_MASK` addresses by 1 instead of
advancing to the next scanline.
Most of the time, this means the prefetcher is simply prefetching the
wrong place in the image. But when the stride is negative, this can
result in a segfault when trying to read memory beyond the end of the
image buffer.
I have fixed this by combining the bitshift and add instructions to
directly update the address register by the correct amount before
prefetching.
My fix results in the same behavior as the code in
`pixman/pixman-arm-neon-asm.h`:
```
PF ldrbge, DUMMY, [PF_SRC, SRC_STRIDE, lsl #src_bpp_shift]!
```
This instruction means that `PF_SRC` is updated with the offset of
`SRC_STRIDE << #src_bpp_shift` and then the byte at updated `PF_SRC` is
read into `DUMMY`. (The `ge` condition has been replaced with a
separate branch instruction in aarch64.)
I also cleaned up a couple other cases where instructions were
redundant.
diff --git a/pixman/pixman-arma64-neon-asm.S b/pixman/pixman-arma64-neon-asm.S
index 7329d4b..519ff5b 100644
--- a/pixman/pixman-arma64-neon-asm.S
+++ b/pixman/pixman-arma64-neon-asm.S
@@ -305,16 +305,14 @@
mov v28.d[0], v14.d[0]
mov v29.d[0], v14.d[1]
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
10:
raddhn v20.8b, v10.8h, v17.8h
raddhn v23.8b, v11.8h, v19.8h
PF ble, 10f
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_SRC, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
raddhn v22.8b, v12.8h, v18.8h
st1 {v14.8h}, [DST_W], #16
@@ -497,9 +495,8 @@ generate_composite_function \
ushll v14.8h, v2.8b, #7
sli v14.8h, v14.8h, #1
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
10:
ushll v9.8h, v0.8b, #7
sli v9.8h, v9.8h, #1
@@ -585,12 +582,10 @@ generate_composite_function \
10:
uqadd v28.8b, v0.8b, v4.8b
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
uqadd v29.8b, v1.8b, v5.8b
uqadd v30.8b, v2.8b, v6.8b
@@ -631,12 +626,10 @@ generate_composite_function \
10:
uqadd v28.8b, v0.8b, v4.8b
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
uqadd v29.8b, v1.8b, v5.8b
uqadd v30.8b, v2.8b, v6.8b
@@ -719,15 +712,13 @@ generate_composite_function_single_scanline \
10:
umull v9.8h, v22.8b, v5.8b
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
10:
umull v10.8h, v22.8b, v6.8b
PF ble, 10f
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
umull v11.8h, v22.8b, v7.8b
.endm
@@ -793,15 +784,13 @@ generate_composite_function_single_scanline \
10:
umull v9.8h, v22.8b, v5.8b
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
10:
umull v10.8h, v22.8b, v6.8b
PF ble, 10f
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
umull v11.8h, v22.8b, v7.8b
.endm
@@ -886,9 +875,8 @@ generate_composite_function_single_scanline \
PF subs, PF_CTL, PF_CTL, #0x10
umull v11.8h, v24.8b, v7.8b
PF ble, 10f
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
.endm
@@ -950,9 +938,8 @@ generate_composite_function \
umull v9.8h, v22.8b, v5.8b
umull v10.8h, v22.8b, v6.8b
PF blt, 10f
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
umull v11.8h, v22.8b, v7.8b
.endm
@@ -1436,9 +1423,8 @@ generate_composite_function \
10:
umull v11.8h, v24.8b, v3.8b
PF ble, 10f
- PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb, DUMMY, [PF_MASK, DUMMY]
- PF add, PF_MASK, PF_MASK, #1
+ PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK]
10:
st4 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
ursra v8.8h, v8.8h, #8
@@ -1517,9 +1503,8 @@ generate_composite_function \
10:
umull v3.8h, v27.8b, v16.8b
PF ble, 10f
- PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb, DUMMY, [PF_MASK, DUMMY]
- PF add, PF_MASK, PF_MASK, #1
+ PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK]
10:
st1 {v28.8b, v29.8b, v30.8b, v31.8b}, [DST_W], #32
ursra v0.8h, v0.8h, #8
@@ -1628,15 +1613,13 @@ generate_composite_function \
10:
umull v19.8h, v24.8b, v11.8b
PF ble, 10f
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
10:
uqadd v28.8b, v0.8b, v28.8b
PF ble, 10f
- PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb, DUMMY, [PF_MASK, DUMMY]
- PF add, PF_MASK, PF_MASK, #1
+ PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK]
10:
uqadd v29.8b, v1.8b, v29.8b
uqadd v30.8b, v2.8b, v30.8b
@@ -2699,9 +2682,8 @@ generate_composite_function \
PF sub, PF_X, PF_X, ORIG_W
PF subs, PF_CTL, PF_CTL, #0x10
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
10:
.endm
@@ -2768,9 +2750,8 @@ generate_composite_function \
PF sub, PF_X, PF_X, ORIG_W
PF subs, PF_CTL, PF_CTL, #0x10
PF ble, 10f
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
10:
.endm
diff --git a/pixman/pixman-arma64-neon-asm.h b/pixman/pixman-arma64-neon-asm.h
index ec3d76f..516ebb8 100644
--- a/pixman/pixman-arma64-neon-asm.h
+++ b/pixman/pixman-arma64-neon-asm.h
@@ -474,25 +474,21 @@
PF lsl, DUMMY, PF_X, #mask_bpp_shift
PF prfm, PREFETCH_MODE, [PF_MASK, DUMMY]
.endif
- PF ble, 71f
+ PF ble, 72f
PF sub, PF_X, PF_X, ORIG_W
PF subs, PF_CTL, PF_CTL, #0x10
-71:
PF ble, 72f
.if src_bpp_shift >= 0
- PF lsl, DUMMY, SRC_STRIDE, #src_bpp_shift
- PF ldrsb, DUMMY, [PF_SRC, DUMMY]
- PF add, PF_SRC, PF_SRC, #1
+ PF add, PF_SRC, PF_SRC, SRC_STRIDE, lsl #src_bpp_shift
+ PF ldrsb, DUMMY, [PF_SRC]
.endif
.if dst_r_bpp != 0
- PF lsl, DUMMY, DST_STRIDE, #dst_bpp_shift
- PF ldrsb, DUMMY, [PF_DST, DUMMY]
- PF add, PF_DST, PF_DST, #1
+ PF add, PF_DST, PF_DST, DST_STRIDE, lsl #dst_bpp_shift
+ PF ldrsb, DUMMY, [PF_DST]
.endif
.if mask_bpp_shift >= 0
- PF lsl, DUMMY, MASK_STRIDE, #mask_bpp_shift
- PF ldrsb, DUMMY, [PF_MASK, DUMMY]
- PF add, PF_MASK, PF_MASK, #1
+ PF add, PF_MASK, PF_MASK, MASK_STRIDE, lsl #mask_bpp_shift
+ PF ldrsb, DUMMY, [PF_MASK]
.endif
72:
.endif
@@ -858,8 +854,7 @@
PF mov, PF_DST, DST_R
PF mov, PF_MASK, MASK
/* PF_CTL = \prefetch_distance | ((h - 1) << 4) */
- PF lsl, DUMMY, H, #4
- PF mov, PF_CTL, DUMMY
+ PF lsl, PF_CTL, H, #4
PF add, PF_CTL, PF_CTL, #(\prefetch_distance - 0x10)
\init
commit fdbb0d944c78b2952bc99235c5bafc60011925a8
Author: Joel May <git at jmay.us>
Date: Thu Apr 10 10:12:11 2025 -0700
Test case for compositing with a negative stride
The aarch64 advanced prefetcher has a bug where it can read past the
end of the image buffers. Typically this causes no ill effects (beyond
making the attempted prefetch useless), because it's simply prefetching
junk data and discarding it. Where this causes problems (i.e. segfault)
is when it tries to read memory that not readable, such as a memory map
with no read permission.
To expose this specific bug, we need a test case with a negative stride
and enough height such that `last_scanline_ptr + height` runs past the
end of the image buffer (this equation is approximate and omits some
small variables). Using `fence_malloc`, we can catch the problem with a
segfault when the prefetch attempts to read memory beyond the end of the
image buffer.
diff --git a/test/meson.build b/test/meson.build
index 47dd33c..e652956 100644
--- a/test/meson.build
+++ b/test/meson.build
@@ -51,6 +51,7 @@ tests = [
'scaling-test',
'composite',
'tolerance-test',
+ 'neg-stride-test',
]
# Remove/update this once thread-test.c supports threading methods
diff --git a/test/neg-stride-test.c b/test/neg-stride-test.c
new file mode 100644
index 0000000..ed10479
--- /dev/null
+++ b/test/neg-stride-test.c
@@ -0,0 +1,105 @@
+/*
+ * Test program, which tests negative strides in compositing with fence_malloc
+ * to check for out-of-bounds memory access.
+ */
+#include "utils.h"
+
+#if FENCE_MALLOC_ACTIVE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+typedef struct
+{
+ int width;
+ int height;
+ int stride;
+ pixman_format_code_t format;
+} image_info_t;
+
+typedef struct
+{
+ pixman_op_t op;
+
+ image_info_t src;
+ image_info_t dest;
+
+ int src_x;
+ int src_y;
+ int dest_x;
+ int dest_y;
+ int width;
+ int height;
+} composite_info_t;
+
+const composite_info_t info =
+{
+ PIXMAN_OP_SRC,
+ { 16, 1000, -16, PIXMAN_a8r8g8b8 },
+ { 16, 1000, -16, PIXMAN_a8r8g8b8 },
+ 0, 0,
+ 0, 0,
+ 16, 1000
+};
+
+static pixman_image_t *
+make_image (const image_info_t *info, char **buf)
+{
+ int size = info->height * abs (info->stride);
+ char *data = fence_malloc (size);
+ int i;
+
+ for (i = 0; i < size; ++i)
+ data[i] = (i % 255) ^ (((i % 16) << 4) | (i & 0xf0));
+
+ *buf = data;
+ if (info->stride < 0)
+ /* Move to the start of the last scanline. */
+ data += size + info->stride;
+
+ return pixman_image_create_bits (info->format,
+ info->width,
+ info->height,
+ (uint32_t *)data,
+ info->stride);
+}
+
+static void
+test_composite (const composite_info_t *info)
+{
+ char *src_buf;
+ char *dest_buf;
+ pixman_image_t *src = make_image (&info->src, &src_buf);
+ pixman_image_t *dest = make_image (&info->dest, &dest_buf);
+
+ pixman_image_composite (PIXMAN_OP_SRC, src, NULL, dest,
+ info->src_x, info->src_y,
+ 0, 0,
+ info->dest_x, info->dest_y,
+ info->width, info->height);
+
+ fence_free (src_buf);
+ fence_free (dest_buf);
+ pixman_image_unref (src);
+ pixman_image_unref (dest);
+}
+
+int
+main (int argc, char **argv)
+{
+ test_composite (&info);
+
+ return 0;
+}
+
+#else /* FENCE_MALLOC_ACTIVE */
+
+int
+main (int argc, char **argv)
+{
+ /* Automake return code for test SKIP. */
+ return 77;
+}
+
+#endif /* FENCE_MALLOC_ACTIVE */
More information about the xorg-commit
mailing list