xf86-video-intel: 2 commits - src/sna/blt.c src/sna/compiler.h src/sna/sna_present.c

Tue Apr 5 17:09:43 UTC 2016

src/sna/blt.c         |   42 ++++++++++++++++++++++++++----------------
 src/sna/compiler.h    |    5 +++++
 src/sna/sna_present.c |   14 ++++++++++++++
 3 files changed, 45 insertions(+), 16 deletions(-)

New commits:
commit 65dc4176d84e1d1764a6fa1edc972aa7f1dcd2ba
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 5 17:53:55 2016 +0100

    sna/present: Prevent reporting an incomplete event
    
    If we cancel a flip, we may try to restore the current mode and this may
    flush the partial flip (in a multi-monitor setup). We report the completed
    event back to present and free the event info. Then we report the error
    back to present, and free the event info a second time. Chaos and
    corruption ensues.
    
    Reported-and-tested-by: Christoph Haag <haagch at frickel.club>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=94829
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_present.c b/src/sna/sna_present.c
index e08afe5..727fa87 100644
--- a/src/sna/sna_present.c
+++ b/src/sna/sna_present.c
@@ -45,6 +45,7 @@ struct sna_present_event {
 	uint64_t *event_id;
 	uint64_t target_msc;
 	int n_event_id;
+	bool queued;
 };
 
 static void sna_present_unflip(ScreenPtr screen, uint64_t event_id);
@@ -260,6 +261,7 @@ static bool sna_present_queue(struct sna_present_event *info,
 		if (!sna_fake_vblank(info))
 			return false;
 	} else {
+		info->queued = true;
 		if (info->target_msc - last_msc == 1) {
 			sna_crtc_set_vblank(info->crtc);
 			info->crtc = mark_crtc(info->crtc);
@@ -334,6 +336,11 @@ sna_present_vblank_handler(struct drm_event_vblank *event)
 	struct sna_present_event *info = to_present_event(event->user_data);
 	xf86CrtcPtr crtc = info->crtc;
 
+	if (!info->queued) {
+		DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__));
+		return;
+	}
+
 	vblank_complete(info,
 			ust64(event->tv_sec, event->tv_usec),
 			sna_crtc_record_event(unmask_crtc(crtc), event));
@@ -406,6 +413,7 @@ sna_present_queue_vblank(RRCrtcPtr crtc, uint64_t event_id, uint64_t msc)
 	info->event_id[0] = event_id;
 	info->n_event_id = 1;
 	list_add_tail(&info->link, &tmp->link);
+	info->queued = false;
 
 	if (!sna_present_queue(info, swap->msc)) {
 		list_del(&info->link);
@@ -569,6 +577,10 @@ present_flip_handler(struct drm_event_vblank *event, void *data)
 
 	DBG(("%s(sequence=%d): event=%lld\n", __FUNCTION__, event->sequence, (long long)info->event_id[0]));
 	assert(info->n_event_id == 1);
+	if (!info->queued) {
+		DBG(("%s: arrived unexpectedly early (not queued)\n", __FUNCTION__));
+		return;
+	}
 
 	if (info->crtc == NULL) {
 		swap.tv_sec = event->tv_sec;
@@ -621,6 +633,7 @@ flip(struct sna *sna,
 	info->event_id[0] = event_id;
 	info->n_event_id = 1;
 	info->target_msc = target_msc;
+	info->queued = false;
 
 	if (!sna_page_flip(sna, bo, present_flip_handler, info)) {
 		DBG(("%s: pageflip failed\n", __FUNCTION__));
@@ -628,6 +641,7 @@ flip(struct sna *sna,
 		return FALSE;
 	}
 
+	info->queued = true;
 	if (info->crtc)
 		sna_crtc_set_vblank(info->crtc);
 	return TRUE;
commit bb5194eebd72e828a46f504d91f1ecd5b5804f57
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Apr 5 06:51:46 2016 +0100

    sna: Add alignment hints to tiled memcpy
    
    Telling the compiler the known alignment should improve the memcpy
    operation, but only has a small impact today (a few bytes/instructions
    per function).
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/blt.c b/src/sna/blt.c
index 2dae9c2..39e4149 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -258,7 +258,8 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
 			if (dst_x & tile_mask) {
 				const unsigned x = (dst_x & tile_mask) * cpp;
 				const unsigned len = min(tile_width - x, w);
-				memcpy(tile_row + x, src, len);
+				memcpy(assume_misaligned(tile_row + x, tile_width, x),
+				       src, len);
 
 				tile_row += tile_size;
 				src = (const uint8_t *)src + len;
@@ -266,13 +267,14 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
 			}
 		}
 		while (w >= tile_width) {
-			memcpy(tile_row, src, tile_width);
+			memcpy(assume_aligned(tile_row, tile_width),
+			       src, tile_width);
 
 			tile_row += tile_size;
 			src = (const uint8_t *)src + tile_width;
 			w -= tile_width;
 		}
-		memcpy(tile_row, src, w);
+		memcpy(assume_aligned(tile_row, tile_width), src, w);
 		src = (const uint8_t *)src + src_stride + w;
 		dst_y++;
 	}
@@ -314,7 +316,9 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
 			if (src_x & tile_mask) {
 				const unsigned x = (src_x & tile_mask) * cpp;
 				const unsigned len = min(tile_width - x, w);
-				memcpy(dst, tile_row + x, len);
+				memcpy(dst,
+				       assume_misaligned(tile_row, tile_width, x),
+				       len);
 
 				tile_row += tile_size;
 				dst = (uint8_t *)dst + len;
@@ -322,13 +326,15 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
 			}
 		}
 		while (w >= tile_width) {
-			memcpy(dst, tile_row, tile_width);
+			memcpy(dst,
+			       assume_aligned(tile_row, tile_width),
+			       tile_width);
 
 			tile_row += tile_size;
 			dst = (uint8_t *)dst + tile_width;
 			w -= tile_width;
 		}
-		memcpy(dst, tile_row, w);
+		memcpy(dst, assume_aligned(tile_row, tile_width), w);
 		dst = (uint8_t *)dst + dst_stride + w;
 		src_y++;
 	}
@@ -379,7 +385,8 @@ memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
 				tile_row + \
 				(dx >> tile_pixels) * tile_size + \
 				(dx & tile_mask) * cpp; \
-			memcpy((char *)dst + swizzle(offset), src_row, 64); \
+			memcpy(assume_aligned((char *)dst+swizzle(offset),64), \
+			       src_row, 64); \
 			src_row += 64; \
 			x -= 64; \
 			dx += swizzle_pixels; \
@@ -389,7 +396,7 @@ memcpy_to_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
 				tile_row + \
 				(dx >> tile_pixels) * tile_size + \
 				(dx & tile_mask) * cpp; \
-			memcpy((char *)dst + swizzle(offset), src_row, x); \
+			memcpy(assume_aligned((char *)dst + swizzle(offset), 64), src_row, x); \
 		} \
 	} \
 }
@@ -439,7 +446,7 @@ memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
 				tile_row + \
 				(sx >> tile_pixels) * tile_size + \
 				(sx & tile_mask) * cpp; \
-			memcpy(dst_row, (const char *)src + swizzle(offset), 64); \
+			memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), 64); \
 			dst_row += 64; \
 			x -= 64; \
 			sx += swizzle_pixels; \
@@ -449,7 +456,7 @@ memcpy_from_tiled_x__##swizzle (const void *src, void *dst, int bpp, \
 				tile_row + \
 				(sx >> tile_pixels) * tile_size + \
 				(sx & tile_mask) * cpp; \
-			memcpy(dst_row, (const char *)src + swizzle(offset), x); \
+			memcpy(dst_row, assume_aligned((const char *)src + swizzle(offset), 64), x); \
 		} \
 	} \
 }
@@ -510,7 +517,7 @@ memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
 			if (dst_x & tile_mask) {
 				const unsigned x = (dst_x & tile_mask) * cpp;
 				const unsigned len = min(tile_width - x, w);
-				memcpy(tile_row + x, src, len);
+				memcpy(assume_misaligned(tile_row + x, tile_width, x), src, len);
 
 				tile_row += tile_size;
 				src = (const uint8_t *)src + len;
@@ -518,13 +525,14 @@ memcpy_to_tiled_x__gen2(const void *src, void *dst, int bpp,
 			}
 		}
 		while (w >= tile_width) {
-			memcpy(tile_row, src, tile_width);
+			memcpy(assume_aligned(tile_row, tile_width),
+			       src, tile_width);
 
 			tile_row += tile_size;
 			src = (const uint8_t *)src + tile_width;
 			w -= tile_width;
 		}
-		memcpy(tile_row, src, w);
+		memcpy(assume_aligned(tile_row, tile_width), src, w);
 		src = (const uint8_t *)src + src_stride + w;
 		dst_y++;
 	}
@@ -566,7 +574,7 @@ memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp,
 			if (src_x & tile_mask) {
 				const unsigned x = (src_x & tile_mask) * cpp;
 				const unsigned len = min(tile_width - x, w);
-				memcpy(dst, tile_row + x, len);
+				memcpy(dst, assume_misaligned(tile_row + x, tile_width, x), len);
 
 				tile_row += tile_size;
 				dst = (uint8_t *)dst + len;
@@ -574,13 +582,15 @@ memcpy_from_tiled_x__gen2(const void *src, void *dst, int bpp,
 			}
 		}
 		while (w >= tile_width) {
-			memcpy(dst, tile_row, tile_width);
+			memcpy(dst,
+			       assume_aligned(tile_row, tile_width),
+			       tile_width);
 
 			tile_row += tile_size;
 			dst = (uint8_t *)dst + tile_width;
 			w -= tile_width;
 		}
-		memcpy(dst, tile_row, w);
+		memcpy(dst, assume_aligned(tile_row, tile_width), w);
 		dst = (uint8_t *)dst + dst_stride + w;
 		src_y++;
 	}
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
index c723137..a1634b3 100644
--- a/src/sna/compiler.h
+++ b/src/sna/compiler.h
@@ -71,6 +71,11 @@
 
 #if HAS_GCC(4, 7)
 #define avx2 fast __attribute__((target("avx2,avx,sse4.2,sse2,fpmath=sse")))
+#define assume_aligned(ptr, align) __builtin_assume_aligned((ptr), (align))
+#define assume_misaligned(ptr, align, offset) __builtin_assume_aligned((ptr), (align), (offset))
+#else
+#define assume_aligned(ptr, align) (ptr)
+#define assume_misaligned(ptr, align, offset) (ptr)
 #endif
 
 #if HAS_GCC(4, 5) && defined(__OPTIMIZE__)