xf86-video-intel: 4 commits - src/compat-api.h src/sna/sna_accel.c src/sna/sna_display.c src/sna/sna.h src/sna/sna_present.c src/sna/sna_trapezoids_imprecise.c src/sna/sna_trapezoids_mono.c src/sna/sna_trapezoids_precise.c

Chris Wilson ickle at kemper.freedesktop.org
Fri Feb 6 14:28:58 PST 2015


 src/compat-api.h                   |   11 +
 src/sna/sna.h                      |   13 +
 src/sna/sna_accel.c                |   41 ++--
 src/sna/sna_display.c              |   18 +-
 src/sna/sna_present.c              |   15 -
 src/sna/sna_trapezoids_imprecise.c |  327 +++++++++++++-----------------------
 src/sna/sna_trapezoids_mono.c      |   73 ++++++--
 src/sna/sna_trapezoids_precise.c   |  329 +++++++++++++------------------------
 8 files changed, 364 insertions(+), 463 deletions(-)

New commits:
commit c7517c4fe2c3025b08fa72ed81886b9544d008ba
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Feb 6 22:16:26 2015 +0000

    sna/present: Restrict pending flip check to enqueueing
    
    Present checks for a flip-compatible Pixmap prior to deciding whether to
    queue the flip after a pending flip, and at that point we do not want to
    reject a potential flip due to outstanding flips. That is only of
    restriction only applies when we request the kernel to perform the flip,
    so more the check to the corresponding position.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_present.c b/src/sna/sna_present.c
index 90d08b5..96d43c8 100644
--- a/src/sna/sna_present.c
+++ b/src/sna/sna_present.c
@@ -213,11 +213,6 @@ check_flip__crtc(struct sna *sna,
 		return false;
 	}
 
-	if (sna->mode.flip_active) {
-		DBG(("%s: flips still pending\n", __FUNCTION__));
-		return false;
-	}
-
 	return true;
 }
 
@@ -414,6 +409,7 @@ sna_present_flip(RRCrtcPtr crtc,
 		 PixmapPtr pixmap,
 		 Bool sync_flip)
 {
+	struct sna *sna = to_sna_from_pixmap(pixmap);
 	struct kgem_bo *bo;
 
 	DBG(("%s(pipe=%d, event=%lld, msc=%lld, pixmap=%ld, sync?=%d)\n",
@@ -423,12 +419,17 @@ sna_present_flip(RRCrtcPtr crtc,
 	     (long long)target_msc,
 	     pixmap->drawable.serialNumber, sync_flip));
 
-	if (!check_flip__crtc(to_sna_from_pixmap(pixmap), crtc)) {
+	if (!check_flip__crtc(sna, crtc)) {
 		DBG(("%s: flip invalid for CRTC\n", __FUNCTION__));
 		return FALSE;
 	}
 
-	assert(to_sna_from_pixmap(pixmap)->present.unflip == 0);
+	if (sna->mode.flip_active) {
+		DBG(("%s: flips still pending\n", __FUNCTION__));
+		return false;
+	}
+
+	assert(sna->present.unflip == 0);
 
 	bo = get_flip_bo(pixmap);
 	if (bo == NULL) {
commit 934733e7c7e027e0bc73a6c102962b745ee3737a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Feb 6 15:59:09 2015 +0000

    sna: Separate per-CRTC client bo from the TearFree cache
    
    Using the same member as both an override and a cache in different
    modes leads unsurprisingly to confusion.
    
    Fixes regression from
    commit 3cfde9f04362a858ddfe7990109147e32eed516c
    Author: Chris Wilson <chris at chris-wilson.co.uk>
    Date:   Tue Jan 13 13:50:08 2015 +0000
    
        sna: Only instantiate the frontbuffer on the GPU if used
    
    Reported-by: Chris Bainbridge <chris.bainbridge at gmail.com>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89007
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c
index fa2970f..080ee19 100644
--- a/src/sna/sna_display.c
+++ b/src/sna/sna_display.c
@@ -119,7 +119,7 @@ struct sna_crtc {
 	struct drm_mode_modeinfo kmode;
 	PixmapPtr slave_pixmap;
 	DamagePtr slave_damage;
-	struct kgem_bo *bo, *shadow_bo, *client_bo;
+	struct kgem_bo *bo, *shadow_bo, *client_bo, *cache_bo;
 	struct sna_cursor *cursor;
 	unsigned int last_cursor_size;
 	uint32_t offset;
@@ -7208,7 +7208,7 @@ void sna_mode_redisplay(struct sna *sna)
 					RegionNull(&new_damage);
 					RegionCopy(&new_damage, &damage);
 
-					bo = sna_crtc->client_bo;
+					bo = sna_crtc->cache_bo;
 					if (bo == NULL) {
 						damage.extents = crtc->bounds;
 						damage.data = NULL;
@@ -7254,7 +7254,7 @@ void sna_mode_redisplay(struct sna *sna)
 
 							sna_crtc->bo = bo;
 							sna_crtc->bo->active_scanout++;
-							sna_crtc->client_bo = NULL;
+							sna_crtc->cache_bo = NULL;
 						} else {
 							DBG(("%s: flip [fb=%d] on crtc %d [%d, pipe=%d] failed - %d\n",
 							     __FUNCTION__, arg.fb_id, i, sna_crtc->id, sna_crtc->pipe, errno));
@@ -7267,7 +7267,7 @@ void sna_mode_redisplay(struct sna *sna)
 							sna_crtc_redisplay__fallback(crtc, &damage, sna_crtc->bo);
 
 							kgem_bo_destroy(&sna->kgem, bo);
-							sna_crtc->client_bo = NULL;
+							sna_crtc->cache_bo = NULL;
 						}
 					} else {
 						sna->mode.flip_active++;
@@ -7279,7 +7279,7 @@ void sna_mode_redisplay(struct sna *sna)
 						sna_crtc->flip_bo->active_scanout++;
 						sna_crtc->flip_serial = sna_crtc->mode_serial;
 
-						sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo);
+						sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo);
 
 						DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n",
 						     __FUNCTION__, sna_crtc->id, sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial));
@@ -7355,7 +7355,7 @@ void sna_mode_redisplay(struct sna *sna)
 				damage.extents = crtc->bounds;
 				damage.data = NULL;
 
-				bo = sna_crtc->client_bo;
+				bo = sna_crtc->cache_bo;
 				if (bo == NULL)
 					bo = kgem_create_2d(&sna->kgem,
 							    crtc->mode.HDisplay,
@@ -7392,7 +7392,7 @@ void sna_mode_redisplay(struct sna *sna)
 
 						sna_crtc->bo = kgem_bo_reference(bo);
 						sna_crtc->bo->active_scanout++;
-						sna_crtc->client_bo = kgem_bo_reference(bo);
+						sna_crtc->cache_bo = kgem_bo_reference(bo);
 					} else {
 						BoxRec box;
 						DrawableRec tmp;
@@ -7422,7 +7422,7 @@ disable1:
 						}
 
 						kgem_bo_destroy(&sna->kgem, bo);
-						sna_crtc->client_bo = NULL;
+						sna_crtc->cache_bo = NULL;
 					}
 					continue;
 				}
@@ -7436,7 +7436,7 @@ disable1:
 				sna_crtc->flip_serial = sna_crtc->mode_serial;
 				sna_crtc->flip_pending = true;
 
-				sna_crtc->client_bo = kgem_bo_reference(sna_crtc->bo);
+				sna_crtc->cache_bo = kgem_bo_reference(sna_crtc->bo);
 				DBG(("%s: recording flip on CRTC:%d handle=%d, active_scanout=%d, serial=%d\n",
 				     __FUNCTION__, sna_crtc->id, sna_crtc->flip_bo->handle, sna_crtc->flip_bo->active_scanout, sna_crtc->flip_serial));
 			} else {
commit 5ed83687722f7238bfc0307022183710d4d3a70f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Feb 6 13:41:16 2015 +0000

    sna/trapezoids: Use incremental region clipping for spans
    
    Within a span, we have the advantage of knowing that we only need to
    intersect one box with the clip region, and that box has monotonically
    increasing y. This avoid having to compute RegionIntersect for every
    span element which was very slow (e.g. libreoffice).
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/compat-api.h b/src/compat-api.h
index c9c76eb..aa93bee 100644
--- a/src/compat-api.h
+++ b/src/compat-api.h
@@ -137,6 +137,17 @@ region_rects(const RegionRec *r)
 	return r->data ? (const BoxRec *)(r->data + 1) :  &r->extents;
 }
 
+inline static void
+region_get_boxes(const RegionRec *r, const BoxRec **s, const BoxRec **e)
+{
+	int n;
+	if (r->data)
+		*s = region_boxptr(r), n = r->data->numRects;
+	else
+		*s = &r->extents, n = 1;
+	*e = *s + n;
+}
+
 #ifndef INCLUDE_LEGACY_REGION_DEFINES
 #define RegionCreate(r, s) REGION_CREATE(NULL, r, s)
 #define RegionBreak(r) REGION_BREAK(NULL, r)
diff --git a/src/sna/sna.h b/src/sna/sna.h
index 3f01d55..076c7be 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -1189,6 +1189,19 @@ box_intersect(BoxPtr a, const BoxRec *b)
 	return true;
 }
 
+const BoxRec *
+__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y);
+inline static const BoxRec *
+find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
+{
+    if (begin->y2 > y)
+	    return begin;
+    if (end->y1 <= y)
+	    return end;
+
+    return __find_clip_box_for_y(begin, end, y);
+}
+
 unsigned sna_cpu_detect(void);
 char *sna_cpu_features_to_string(unsigned features, char *line);
 
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6aa4b27..6c516ad 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -7162,8 +7162,8 @@ sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 			   copy, 0, NULL);
 }
 
-static const BoxRec *
-find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
+const BoxRec *
+__find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
 {
     const BoxRec *mid;
 
@@ -7183,9 +7183,9 @@ find_clip_box_for_y(const BoxRec *begin, const BoxRec *end, int16_t y)
 	 * will return @mid, which is then known to be the
 	 * correct answer.
 	 */
-	return find_clip_box_for_y(begin, mid, y);
+	return __find_clip_box_for_y(begin, mid, y);
     else
-	return find_clip_box_for_y(mid, end, y);
+	return __find_clip_box_for_y(mid, end, y);
 }
 
 struct sna_fill_spans {
diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c
index fc631f2..60c1387 100644
--- a/src/sna/sna_trapezoids_imprecise.c
+++ b/src/sna/sna_trapezoids_imprecise.c
@@ -1687,31 +1687,28 @@ struct span_thread {
 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
 struct span_thread_boxes {
 	const struct sna_composite_spans_op *op;
+	const BoxRec *clip_start, *clip_end;
 	int num_boxes;
 	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
 };
 
-static void span_thread_add_boxes(struct sna *sna, void *data,
-				  const BoxRec *box, int count, float alpha)
+static void span_thread_add_box(struct sna *sna, void *data,
+				const BoxRec *box, float alpha)
 {
 	struct span_thread_boxes *b = data;
 
 	__DBG(("%s: adding %d boxes with alpha=%f\n",
 	       __FUNCTION__, count, alpha));
 
-	assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
-	if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) {
-		DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
-		assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+	if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) {
+		DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes));
 		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
 		b->num_boxes = 0;
 	}
 
-	do {
-		b->boxes[b->num_boxes].box = *box++;
-		b->boxes[b->num_boxes].alpha = alpha;
-		b->num_boxes++;
-	} while (--count);
+	b->boxes[b->num_boxes].box = *box++;
+	b->boxes[b->num_boxes].alpha = alpha;
+	b->num_boxes++;
 	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
 }
 
@@ -1737,7 +1734,7 @@ span_thread_box(struct sna *sna,
 		}
 	}
 
-	span_thread_add_boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage));
+	span_thread_add_box(sna, op, box, AREA_TO_ALPHA(coverage));
 }
 
 static void
@@ -1747,35 +1744,28 @@ span_thread_clipped_box(struct sna *sna,
 			const BoxRec *box,
 			int coverage)
 {
-	pixman_region16_t region;
+	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
+	const BoxRec *c;
 
 	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
 	       AREA_TO_ALPHA(coverage)));
 
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	if (region_num_rects(&region)) {
-		struct span_thread_boxes *b = (struct span_thread_boxes *)op;
-
-		if (region.data == NULL && b->num_boxes) {
-			struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1];
-			if (bb->box.x1 == region.extents.x1 &&
-			    bb->box.x2 == region.extents.x2 &&
-			    bb->box.y2 == region.extents.y1 &&
-			    bb->alpha == AREA_TO_ALPHA(coverage)) {
-				bb->box.y2 = region.extents.y2;
-				__DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2));
-				goto out;
-			}
-		}
+	b->clip_start =
+		find_clip_box_for_y(b->clip_start, b->clip_end, box->y1);
 
-		span_thread_add_boxes(sna, op,
-				      region_rects(&region),
-				      region_num_rects(&region),
-				      AREA_TO_ALPHA(coverage));
+	c = b->clip_start;
+	while (c != b->clip_end) {
+		BoxRec clipped;
+
+		if (box->y2 <= c->y1)
+			break;
+
+		clipped = *box;
+		if (!box_intersect(&clipped, c++))
+			continue;
+
+		span_thread_add_box(sna, op, &clipped, AREA_TO_ALPHA(coverage));
 	}
-out:
-	pixman_region_fini(&region);
 }
 
 static span_func_t
@@ -1806,6 +1796,16 @@ thread_choose_span(struct sna_composite_spans_op *tmp,
 	return span;
 }
 
+inline static void
+span_thread_boxes_init(struct span_thread_boxes *boxes,
+		       const struct sna_composite_spans_op *op,
+		       const RegionRec *clip)
+{
+	boxes->op = op;
+	region_get_boxes(clip, &boxes->clip_start, &boxes->clip_end);
+	boxes->num_boxes = 0;
+}
+
 static void
 span_thread(void *arg)
 {
@@ -1818,8 +1818,7 @@ span_thread(void *arg)
 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
 		return;
 
-	boxes.op = thread->op;
-	boxes.num_boxes = 0;
+	span_thread_boxes_init(&boxes, thread->op, thread->clip);
 
 	y1 = thread->extents.y1 - thread->draw_y;
 	y2 = thread->extents.y2 - thread->draw_y;
@@ -2219,6 +2218,52 @@ static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
 	} while (--h);
 }
 
+struct clipped_span {
+	span_func_t span;
+	const BoxRec *clip_start, *clip_end;
+};
+
+static void
+tor_blt_clipped(struct sna *sna,
+		struct sna_composite_spans_op *op,
+		pixman_region16_t *clip,
+		const BoxRec *box,
+		int coverage)
+{
+	struct clipped_span *cs = (struct clipped_span *)clip;
+	const BoxRec *c;
+
+	cs->clip_start =
+		find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1);
+
+	c = cs->clip_start;
+	while (c != cs->clip_end) {
+		BoxRec clipped;
+
+		if (box->y2 <= c->y1)
+			break;
+
+		clipped = *box;
+		if (!box_intersect(&clipped, c++))
+			continue;
+
+		cs->span(sna, op, NULL, &clipped, coverage);
+	}
+}
+
+inline static span_func_t
+clipped_span(struct clipped_span *cs,
+	     span_func_t span,
+	     const RegionRec *clip)
+{
+	if (clip->data) {
+		cs->span = span;
+		region_get_boxes(clip, &cs->clip_start, &cs->clip_end);
+		span = tor_blt_clipped;
+	}
+	return span;
+}
+
 static void
 tor_blt_src(struct sna *sna,
 	    struct sna_composite_spans_op *op,
@@ -2232,25 +2277,6 @@ tor_blt_src(struct sna *sna,
 }
 
 static void
-tor_blt_src_clipped(struct sna *sna,
-		    struct sna_composite_spans_op *op,
-		    pixman_region16_t *clip,
-		    const BoxRec *box,
-		    int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_src(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
-static void
 tor_blt_in(struct sna *sna,
 	   struct sna_composite_spans_op *op,
 	   pixman_region16_t *clip,
@@ -2282,25 +2308,6 @@ tor_blt_in(struct sna *sna,
 }
 
 static void
-tor_blt_in_clipped(struct sna *sna,
-		   struct sna_composite_spans_op *op,
-		   pixman_region16_t *clip,
-		   const BoxRec *box,
-		   int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_in(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
-static void
 tor_blt_add(struct sna *sna,
 	    struct sna_composite_spans_op *op,
 	    pixman_region16_t *clip,
@@ -2339,25 +2346,6 @@ tor_blt_add(struct sna *sna,
 }
 
 static void
-tor_blt_add_clipped(struct sna *sna,
-		    struct sna_composite_spans_op *op,
-		    pixman_region16_t *clip,
-		    const BoxRec *box,
-		    int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_add(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
-static void
 tor_blt_lerp32(struct sna *sna,
 	       struct sna_composite_spans_op *op,
 	       pixman_region16_t *clip,
@@ -2412,25 +2400,6 @@ tor_blt_lerp32(struct sna *sna,
 	}
 }
 
-static void
-tor_blt_lerp32_clipped(struct sna *sna,
-		       struct sna_composite_spans_op *op,
-		       pixman_region16_t *clip,
-		       const BoxRec *box,
-		       int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_lerp32(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
 struct pixman_inplace {
 	pixman_image_t *image, *source, *mask;
 	uint32_t color;
@@ -2460,24 +2429,6 @@ pixmask_span_solid(struct sna *sna,
 			       pi->dx + box->x1, pi->dy + box->y1,
 			       box->x2 - box->x1, box->y2 - box->y1);
 }
-static void
-pixmask_span_solid__clipped(struct sna *sna,
-			    struct sna_composite_spans_op *op,
-			    pixman_region16_t *clip,
-			    const BoxRec *box,
-			    int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		pixmask_span_solid(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
 
 static void
 pixmask_span(struct sna *sna,
@@ -2500,24 +2451,6 @@ pixmask_span(struct sna *sna,
 			       pi->dx + box->x1, pi->dy + box->y1,
 			       box->x2 - box->x1, box->y2 - box->y1);
 }
-static void
-pixmask_span__clipped(struct sna *sna,
-		      struct sna_composite_spans_op *op,
-		      pixman_region16_t *clip,
-		      const BoxRec *box,
-		      int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		pixmask_span(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
 
 struct inplace_x8r8g8b8_thread {
 	xTrapezoid *traps;
@@ -2536,6 +2469,7 @@ static void inplace_x8r8g8b8_thread(void *arg)
 	struct inplace_x8r8g8b8_thread *thread = arg;
 	struct tor tor;
 	span_func_t span;
+	struct clipped_span clipped;
 	RegionPtr clip;
 	int y1, y2, n;
 
@@ -2566,12 +2500,11 @@ static void inplace_x8r8g8b8_thread(void *arg)
 		inplace.stride = pixmap->devKind;
 		inplace.color = thread->color;
 
-		if (clip->data)
-			span = tor_blt_lerp32_clipped;
-		else
-			span = tor_blt_lerp32;
+		span = clipped_span(&clipped, tor_blt_lerp32, clip);
 
-		tor_render(NULL, &tor, (void*)&inplace, clip, span, false);
+		tor_render(NULL, &tor,
+			   (void*)&inplace, (void*)&clipped,
+			   span, false);
 	} else if (thread->is_solid) {
 		struct pixman_inplace pi;
 
@@ -2584,12 +2517,11 @@ static void inplace_x8r8g8b8_thread(void *arg)
 						     1, 1, pi.bits, 0);
 		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
 
-		if (clip->data)
-			span = pixmask_span_solid__clipped;
-		else
-			span = pixmask_span_solid;
+		span = clipped_span(&clipped, pixmask_span_solid, clip);
 
-		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
+		tor_render(NULL, &tor,
+			   (void*)&pi, (void *)&clipped,
+			   span, false);
 
 		pixman_image_unref(pi.source);
 		pixman_image_unref(pi.image);
@@ -2608,12 +2540,11 @@ static void inplace_x8r8g8b8_thread(void *arg)
 		pi.bits = pixman_image_get_data(pi.mask);
 		pi.op = thread->op;
 
-		if (clip->data)
-			span = pixmask_span__clipped;
-		else
-			span = pixmask_span;
+		span = clipped_span(&clipped, pixmask_span, clip);
 
-		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
+		tor_render(NULL, &tor,
+			   (void*)&pi, (void *)&clipped,
+			   span, false);
 
 		pixman_image_unref(pi.mask);
 		pixman_image_unref(pi.source);
@@ -2727,6 +2658,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 	if (num_threads == 1) {
 		struct tor tor;
 		span_func_t span;
+		struct clipped_span clipped;
 
 		if (!tor_init(&tor, &region.extents, 2*ntrap))
 			return true;
@@ -2752,17 +2684,15 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			inplace.stride = pixmap->devKind;
 			inplace.color = color;
 
-			if (dst->pCompositeClip->data)
-				span = tor_blt_lerp32_clipped;
-			else
-				span = tor_blt_lerp32;
+			span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip);
 
 			DBG(("%s: render inplace op=%d, color=%08x\n",
 			     __FUNCTION__, op, color));
 
 			if (sigtrap_get() == 0) {
-				tor_render(NULL, &tor, (void*)&inplace,
-					   dst->pCompositeClip, span, false);
+				tor_render(NULL, &tor,
+					   (void*)&inplace, (void*)&clipped,
+					   span, false);
 				sigtrap_put();
 			}
 		} else if (is_solid) {
@@ -2777,15 +2707,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 							     1, 1, pi.bits, 0);
 			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
 
-			if (dst->pCompositeClip->data)
-				span = pixmask_span_solid__clipped;
-			else
-				span = pixmask_span_solid;
+			span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip);
 
 			if (sigtrap_get() == 0) {
-				tor_render(NULL, &tor, (void*)&pi,
-					   dst->pCompositeClip, span,
-					   false);
+				tor_render(NULL, &tor,
+					   (void*)&pi, (void*)&clipped,
+					   span, false);
 				sigtrap_put();
 			}
 
@@ -2806,15 +2733,12 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			pi.bits = pixman_image_get_data(pi.mask);
 			pi.op = op;
 
-			if (dst->pCompositeClip->data)
-				span = pixmask_span__clipped;
-			else
-				span = pixmask_span;
+			span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip);
 
 			if (sigtrap_get() == 0) {
-				tor_render(NULL, &tor, (void*)&pi,
-					   dst->pCompositeClip, span,
-					   false);
+				tor_render(NULL, &tor,
+					   (void*)&pi, (void*)&clipped,
+					   span, false);
 				sigtrap_put();
 			}
 
@@ -2876,9 +2800,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 
 struct inplace_thread {
 	xTrapezoid *traps;
-	RegionPtr clip;
 	span_func_t span;
 	struct inplace inplace;
+	struct clipped_span clipped;
 	BoxRec extents;
 	int dx, dy;
 	int draw_x, draw_y;
@@ -2903,8 +2827,9 @@ static void inplace_thread(void *arg)
 		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
 	}
 
-	tor_render(NULL, &tor, (void*)&thread->inplace,
-		   thread->clip, thread->span, thread->unbounded);
+	tor_render(NULL, &tor,
+		   (void*)&thread->inplace, (void*)&thread->clipped,
+		   thread->span, thread->unbounded);
 
 	tor_fini(&tor);
 }
@@ -2918,6 +2843,7 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 				 bool fallback)
 {
 	struct inplace inplace;
+	struct clipped_span clipped;
 	span_func_t span;
 	PixmapPtr pixmap;
 	struct sna_pixmap *priv;
@@ -3034,21 +2960,12 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 	     region.extents.x2, region.extents.y2));
 
 	if (op == PictOpSrc) {
-		if (dst->pCompositeClip->data)
-			span = tor_blt_src_clipped;
-		else
-			span = tor_blt_src;
+		span = tor_blt_src;
 	} else if (op == PictOpIn) {
-		if (dst->pCompositeClip->data)
-			span = tor_blt_in_clipped;
-		else
-			span = tor_blt_in;
+		span = tor_blt_in;
 	} else {
 		assert(op == PictOpAdd);
-		if (dst->pCompositeClip->data)
-			span = tor_blt_add_clipped;
-		else
-			span = tor_blt_add;
+		span = tor_blt_add;
 	}
 
 	DBG(("%s: move-to-cpu\n", __FUNCTION__));
@@ -3066,6 +2983,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 	inplace.stride = pixmap->devKind;
 	inplace.opacity = color >> 24;
 
+	span = clipped_span(&clipped, span, dst->pCompositeClip);
+
 	num_threads = 1;
 	if ((flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
 		num_threads = sna_use_threads(region.extents.x2 - region.extents.x1,
@@ -3086,8 +3005,9 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 		}
 
 		if (sigtrap_get() == 0) {
-			tor_render(NULL, &tor, (void*)&inplace,
-				   dst->pCompositeClip, span, unbounded);
+			tor_render(NULL, &tor,
+				   (void*)&inplace, (void *)&clipped,
+				   span, unbounded);
 			sigtrap_put();
 		}
 
@@ -3104,8 +3024,8 @@ imprecise_trapezoid_span_inplace(struct sna *sna,
 		threads[0].traps = traps;
 		threads[0].ntrap = ntrap;
 		threads[0].inplace = inplace;
+		threads[0].clipped = clipped;
 		threads[0].extents = region.extents;
-		threads[0].clip = dst->pCompositeClip;
 		threads[0].span = span;
 		threads[0].unbounded = unbounded;
 		threads[0].dx = dx;
@@ -3736,8 +3656,7 @@ tristrip_thread(void *arg)
 	if (!tor_init(&tor, &thread->extents, 2*thread->count))
 		return;
 
-	boxes.op = thread->op;
-	boxes.num_boxes = 0;
+	span_thread_boxes_init(&boxes, thread->op, thread->clip);
 
 	cw = 0; ccw = 1;
 	polygon_add_line(tor.polygon,
diff --git a/src/sna/sna_trapezoids_mono.c b/src/sna/sna_trapezoids_mono.c
index 808703a..29cb58f 100644
--- a/src/sna/sna_trapezoids_mono.c
+++ b/src/sna/sna_trapezoids_mono.c
@@ -72,6 +72,7 @@ struct mono {
 	struct sna *sna;
 	struct sna_composite_op op;
 	pixman_region16_t clip;
+	const BoxRec *clip_start, *clip_end;
 
 	fastcall void (*span)(struct mono *, int, int, BoxPtr);
 
@@ -474,6 +475,34 @@ mono_span__fast(struct mono *c, int x1, int x2, BoxPtr box)
 	c->op.box(c->sna, &c->op, box);
 }
 
+fastcall static void
+mono_span__clipped(struct mono *c, int x1, int x2, BoxPtr box)
+{
+	const BoxRec *b;
+
+	__DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
+
+	c->clip_start =
+		find_clip_box_for_y(c->clip_start, c->clip_end, box->y1);
+
+	b = c->clip_start;
+	while (b != c->clip_end) {
+		BoxRec clipped;
+
+		if (box->y2 <= b->y1)
+			break;
+
+		clipped.x1 = x1;
+		clipped.x2 = x2;
+		clipped.y1 = box->y1;
+		clipped.y2 = box->y2;
+		if (!box_intersect(&clipped, b++))
+			continue;
+
+		c->op.box(c->sna, &c->op, &clipped);
+	}
+}
+
 struct mono_span_thread_boxes {
 	const struct sna_composite_op *op;
 #define MONO_SPAN_MAX_BOXES (8192/sizeof(BoxRec))
@@ -482,40 +511,45 @@ struct mono_span_thread_boxes {
 };
 
 inline static void
-thread_mono_span_add_boxes(struct mono *c, const BoxRec *box, int count)
+thread_mono_span_add_box(struct mono *c, const BoxRec *box)
 {
 	struct mono_span_thread_boxes *b = c->op.priv;
 
-	assert(count > 0 && count <= MONO_SPAN_MAX_BOXES);
-	if (unlikely(b->num_boxes + count > MONO_SPAN_MAX_BOXES)) {
+	if (unlikely(b->num_boxes == MONO_SPAN_MAX_BOXES)) {
 		b->op->thread_boxes(c->sna, b->op, b->boxes, b->num_boxes);
 		b->num_boxes = 0;
 	}
 
-	memcpy(b->boxes + b->num_boxes, box, count*sizeof(BoxRec));
-	b->num_boxes += count;
+	b->boxes[b->num_boxes++] = *box;
 	assert(b->num_boxes <= MONO_SPAN_MAX_BOXES);
 }
 
 fastcall static void
 thread_mono_span_clipped(struct mono *c, int x1, int x2, BoxPtr box)
 {
-	pixman_region16_t region;
+	const BoxRec *b;
 
 	__DBG(("%s [%d, %d]\n", __FUNCTION__, x1, x2));
 
-	box->x1 = x1;
-	box->x2 = x2;
+	c->clip_start =
+		find_clip_box_for_y(c->clip_start, c->clip_end, box->y1);
 
-	assert(c->clip.data);
+	b = c->clip_start;
+	while (b != c->clip_end) {
+		BoxRec clipped;
 
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, &c->clip);
-	if (region_num_rects(&region))
-		thread_mono_span_add_boxes(c,
-					   region_rects(&region),
-					   region_num_rects(&region));
-	pixman_region_fini(&region);
+		if (box->y2 <= b->y1)
+			break;
+
+		clipped.x1 = x1;
+		clipped.x2 = x2;
+		clipped.y1 = box->y1;
+		clipped.y2 = box->y2;
+		if (!box_intersect(&clipped, b++))
+			continue;
+
+		thread_mono_span_add_box(c, &clipped);
+	}
 }
 
 fastcall static void
@@ -525,7 +559,7 @@ thread_mono_span(struct mono *c, int x1, int x2, BoxPtr box)
 
 	box->x1 = x1;
 	box->x2 = x2;
-	thread_mono_span_add_boxes(c, box, 1);
+	thread_mono_span_add_box(c, box);
 }
 
 inline static void
@@ -717,6 +751,7 @@ mono_span_thread(void *arg)
 		if (RegionNil(&mono.clip))
 			return;
 	}
+	region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end);
 
 	boxes.op = thread->op;
 	boxes.num_boxes = 0;
@@ -891,9 +926,12 @@ mono_trapezoids_span_converter(struct sna *sna,
 
 	if (mono.clip.data == NULL && mono.op.damage == NULL)
 		mono.span = mono_span__fast;
+	else if (mono.clip.data != NULL && mono.op.damage == NULL)
+		mono.span = mono_span__clipped;
 	else
 		mono.span = mono_span;
 
+	region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end);
 	mono_render(&mono);
 	mono.op.done(mono.sna, &mono.op);
 	mono_fini(&mono);
@@ -939,6 +977,7 @@ mono_trapezoids_span_converter(struct sna *sna,
 					       mono.clip.extents.x2 - mono.clip.extents.x1,
 					       mono.clip.extents.y2 - mono.clip.extents.y1,
 					       COMPOSITE_PARTIAL, memset(&mono.op, 0, sizeof(mono.op)))) {
+			region_get_boxes(&mono.clip, &mono.clip_start, &mono.clip_end);
 			mono_render(&mono);
 			mono.op.done(mono.sna, &mono.op);
 		}
diff --git a/src/sna/sna_trapezoids_precise.c b/src/sna/sna_trapezoids_precise.c
index 53f61d1..e7ea433 100644
--- a/src/sna/sna_trapezoids_precise.c
+++ b/src/sna/sna_trapezoids_precise.c
@@ -1635,31 +1635,28 @@ struct span_thread {
 #define SPAN_THREAD_MAX_BOXES (8192/sizeof(struct sna_opacity_box))
 struct span_thread_boxes {
 	const struct sna_composite_spans_op *op;
+	const BoxRec *clip_start, *clip_end;
 	int num_boxes;
 	struct sna_opacity_box boxes[SPAN_THREAD_MAX_BOXES];
 };
 
-static void span_thread_add_boxes(struct sna *sna, void *data,
-				  const BoxRec *box, int count, float alpha)
+static void span_thread_add_box(struct sna *sna, void *data,
+				const BoxRec *box, float alpha)
 {
 	struct span_thread_boxes *b = data;
 
 	__DBG(("%s: adding %d boxes with alpha=%f\n",
 	       __FUNCTION__, count, alpha));
 
-	assert(count > 0 && count <= SPAN_THREAD_MAX_BOXES);
-	if (unlikely(b->num_boxes + count > SPAN_THREAD_MAX_BOXES)) {
-		DBG(("%s: flushing %d boxes, adding %d\n", __FUNCTION__, b->num_boxes, count));
-		assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
+	if (unlikely(b->num_boxes == SPAN_THREAD_MAX_BOXES)) {
+		DBG(("%s: flushing %d boxes\n", __FUNCTION__, b->num_boxes));
 		b->op->thread_boxes(sna, b->op, b->boxes, b->num_boxes);
 		b->num_boxes = 0;
 	}
 
-	do {
-		b->boxes[b->num_boxes].box = *box++;
-		b->boxes[b->num_boxes].alpha = alpha;
-		b->num_boxes++;
-	} while (--count);
+	b->boxes[b->num_boxes].box = *box++;
+	b->boxes[b->num_boxes].alpha = alpha;
+	b->num_boxes++;
 	assert(b->num_boxes <= SPAN_THREAD_MAX_BOXES);
 }
 
@@ -1685,7 +1682,7 @@ span_thread_box(struct sna *sna,
 		}
 	}
 
-	span_thread_add_boxes(sna, op, box, 1, AREA_TO_FLOAT(coverage));
+	span_thread_add_box(sna, op, box, AREA_TO_FLOAT(coverage));
 }
 
 static void
@@ -1695,35 +1692,28 @@ span_thread_clipped_box(struct sna *sna,
 			const BoxRec *box,
 			int coverage)
 {
-	pixman_region16_t region;
+	struct span_thread_boxes *b = (struct span_thread_boxes *)op;
+	const BoxRec *c;
 
 	__DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2,
 	       AREA_TO_FLOAT(coverage)));
 
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	if (region_num_rects(&region)) {
-		struct span_thread_boxes *b = (struct span_thread_boxes *)op;
-
-		if (region.data == NULL && b->num_boxes) {
-			struct sna_opacity_box *bb = &b->boxes[b->num_boxes-1];
-			if (bb->box.x1 == region.extents.x1 &&
-			    bb->box.x2 == region.extents.x2 &&
-			    bb->box.y2 == region.extents.y1 &&
-			    bb->alpha == AREA_TO_FLOAT(coverage)) {
-				bb->box.y2 = region.extents.y2;
-				__DBG(("%s: contracted double row: %d -> %d\n", __func__, bb->box.y1, bb->box.y2));
-				goto out;
-			}
-		}
+	b->clip_start =
+		find_clip_box_for_y(b->clip_start, b->clip_end, box->y1);
 
-		span_thread_add_boxes(sna, op,
-				      region_rects(&region),
-				      region_num_rects(&region),
-				      AREA_TO_FLOAT(coverage));
+	c = b->clip_start;
+	while (c != b->clip_end) {
+		BoxRec clipped;
+
+		if (box->y2 <= c->y1)
+			break;
+
+		clipped = *box;
+		if (!box_intersect(&clipped, c++))
+			continue;
+
+		span_thread_add_box(sna, op, &clipped, AREA_TO_FLOAT(coverage));
 	}
-out:
-	pixman_region_fini(&region);
 }
 
 static span_func_t
@@ -1741,7 +1731,7 @@ thread_choose_span(struct sna_composite_spans_op *tmp,
 
 	assert(!is_mono(dst, maskFormat));
 	assert(tmp->thread_boxes);
-	DBG(("%s: clipped? %d\n", __FUNCTION__, clip->data != NULL));
+	DBG(("%s: clipped? %d x %d\n", __FUNCTION__, clip->data != NULL, region_num_rects(clip)));
 	if (clip->data)
 		span = span_thread_clipped_box;
 	else
@@ -1750,6 +1740,17 @@ thread_choose_span(struct sna_composite_spans_op *tmp,
 	return span;
 }
 
+inline static void
+span_thread_boxes_init(struct span_thread_boxes *boxes,
+		       const struct sna_composite_spans_op *op,
+		       const RegionRec *clip)
+{
+	boxes->op = op;
+	boxes->clip_start = region_rects(clip);
+	boxes->clip_end = boxes->clip_start + region_num_rects(clip);
+	boxes->num_boxes = 0;
+}
+
 static void
 span_thread(void *arg)
 {
@@ -1762,8 +1763,7 @@ span_thread(void *arg)
 	if (!tor_init(&tor, &thread->extents, 2*thread->ntrap))
 		return;
 
-	boxes.op = thread->op;
-	boxes.num_boxes = 0;
+	span_thread_boxes_init(&boxes, thread->op, thread->clip);
 
 	y1 = thread->extents.y1 - thread->draw_y;
 	y2 = thread->extents.y2 - thread->draw_y;
@@ -2212,6 +2212,52 @@ static force_inline uint8_t coverage_opacity(int coverage, uint8_t opacity)
 	return opacity == 255 ? coverage : mul_8_8(coverage, opacity);
 }
 
+struct clipped_span {
+	span_func_t span;
+	const BoxRec *clip_start, *clip_end;
+};
+
+static void
+tor_blt_clipped(struct sna *sna,
+		struct sna_composite_spans_op *op,
+		pixman_region16_t *clip,
+		const BoxRec *box,
+		int coverage)
+{
+	struct clipped_span *cs = (struct clipped_span *)clip;
+	const BoxRec *c;
+
+	cs->clip_start =
+		find_clip_box_for_y(cs->clip_start, cs->clip_end, box->y1);
+
+	c = cs->clip_start;
+	while (c != cs->clip_end) {
+		BoxRec clipped;
+
+		if (box->y2 <= c->y1)
+			break;
+
+		clipped = *box;
+		if (!box_intersect(&clipped, c++))
+			continue;
+
+		cs->span(sna, op, NULL, &clipped, coverage);
+	}
+}
+
+inline static span_func_t
+clipped_span(struct clipped_span *cs,
+	     span_func_t span,
+	     const RegionRec *clip)
+{
+	if (clip->data) {
+		cs->span = span;
+		region_get_boxes(clip, &cs->clip_start, &cs->clip_end);
+		span = tor_blt_clipped;
+	}
+	return span;
+}
+
 static void _tor_blt_src(struct inplace *in, const BoxRec *box, uint8_t v)
 {
 	uint8_t *ptr = in->ptr;
@@ -2247,25 +2293,6 @@ tor_blt_src(struct sna *sna,
 }
 
 static void
-tor_blt_src_clipped(struct sna *sna,
-		    struct sna_composite_spans_op *op,
-		    pixman_region16_t *clip,
-		    const BoxRec *box,
-		    int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_src(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
-static void
 tor_blt_in(struct sna *sna,
 	   struct sna_composite_spans_op *op,
 	   pixman_region16_t *clip,
@@ -2297,25 +2324,6 @@ tor_blt_in(struct sna *sna,
 }
 
 static void
-tor_blt_in_clipped(struct sna *sna,
-		   struct sna_composite_spans_op *op,
-		   pixman_region16_t *clip,
-		   const BoxRec *box,
-		   int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_in(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
-static void
 tor_blt_add(struct sna *sna,
 	    struct sna_composite_spans_op *op,
 	    pixman_region16_t *clip,
@@ -2354,25 +2362,6 @@ tor_blt_add(struct sna *sna,
 }
 
 static void
-tor_blt_add_clipped(struct sna *sna,
-		    struct sna_composite_spans_op *op,
-		    pixman_region16_t *clip,
-		    const BoxRec *box,
-		    int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_add(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
-static void
 tor_blt_lerp32(struct sna *sna,
 	       struct sna_composite_spans_op *op,
 	       pixman_region16_t *clip,
@@ -2425,25 +2414,6 @@ tor_blt_lerp32(struct sna *sna,
 	}
 }
 
-static void
-tor_blt_lerp32_clipped(struct sna *sna,
-		       struct sna_composite_spans_op *op,
-		       pixman_region16_t *clip,
-		       const BoxRec *box,
-		       int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		tor_blt_lerp32(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
-
 struct pixman_inplace {
 	pixman_image_t *image, *source, *mask;
 	uint32_t color;
@@ -2471,24 +2441,6 @@ pixmask_span_solid(struct sna *sna,
 			       pi->dx + box->x1, pi->dy + box->y1,
 			       box->x2 - box->x1, box->y2 - box->y1);
 }
-static void
-pixmask_span_solid__clipped(struct sna *sna,
-			    struct sna_composite_spans_op *op,
-			    pixman_region16_t *clip,
-			    const BoxRec *box,
-			    int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		pixmask_span_solid(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
 
 static void
 pixmask_span(struct sna *sna,
@@ -2509,24 +2461,6 @@ pixmask_span(struct sna *sna,
 			       pi->dx + box->x1, pi->dy + box->y1,
 			       box->x2 - box->x1, box->y2 - box->y1);
 }
-static void
-pixmask_span__clipped(struct sna *sna,
-		      struct sna_composite_spans_op *op,
-		      pixman_region16_t *clip,
-		      const BoxRec *box,
-		      int coverage)
-{
-	pixman_region16_t region;
-	int n;
-
-	pixman_region_init_rects(&region, box, 1);
-	RegionIntersect(&region, &region, clip);
-	n = region_num_rects(&region);
-	box = region_rects(&region);
-	while (n--)
-		pixmask_span(sna, op, NULL, box++, coverage);
-	pixman_region_fini(&region);
-}
 
 struct inplace_x8r8g8b8_thread {
 	xTrapezoid *traps;
@@ -2545,6 +2479,7 @@ static void inplace_x8r8g8b8_thread(void *arg)
 	struct inplace_x8r8g8b8_thread *thread = arg;
 	struct tor tor;
 	span_func_t span;
+	struct clipped_span clipped;
 	RegionPtr clip;
 	int y1, y2, n;
 
@@ -2575,12 +2510,11 @@ static void inplace_x8r8g8b8_thread(void *arg)
 		inplace.stride = pixmap->devKind;
 		inplace.color = thread->color;
 
-		if (clip->data)
-			span = tor_blt_lerp32_clipped;
-		else
-			span = tor_blt_lerp32;
+		span = clipped_span(&clipped, tor_blt_lerp32, clip);
 
-		tor_render(NULL, &tor, (void*)&inplace, clip, span, false);
+		tor_render(NULL, &tor,
+			   (void*)&inplace, (void *)&clipped,
+			   span, false);
 	} else if (thread->is_solid) {
 		struct pixman_inplace pi;
 
@@ -2593,10 +2527,7 @@ static void inplace_x8r8g8b8_thread(void *arg)
 						     1, 1, pi.bits, 0);
 		pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
 
-		if (clip->data)
-			span = pixmask_span_solid__clipped;
-		else
-			span = pixmask_span_solid;
+		span = clipped_span(&clipped, pixmask_span_solid, clip);
 
 		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
 
@@ -2617,12 +2548,11 @@ static void inplace_x8r8g8b8_thread(void *arg)
 		pi.bits = pixman_image_get_data(pi.mask);
 		pi.op = thread->op;
 
-		if (clip->data)
-			span = pixmask_span__clipped;
-		else
-			span = pixmask_span;
+		span = clipped_span(&clipped, pixmask_span, clip);
 
-		tor_render(NULL, &tor, (void*)&pi, clip, span, false);
+		tor_render(NULL, &tor,
+			   (void*)&pi, (void *)&clipped,
+			   span, false);
 
 		pixman_image_unref(pi.mask);
 		pixman_image_unref(pi.source);
@@ -2741,6 +2671,7 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 	if (num_threads == 1) {
 		struct tor tor;
 		span_func_t span;
+		struct clipped_span clipped;
 
 		if (!tor_init(&tor, &region.extents, 2*ntrap))
 			return true;
@@ -2766,17 +2697,14 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			inplace.stride = pixmap->devKind;
 			inplace.color = color;
 
-			if (dst->pCompositeClip->data)
-				span = tor_blt_lerp32_clipped;
-			else
-				span = tor_blt_lerp32;
-
+			span = clipped_span(&clipped, tor_blt_lerp32, dst->pCompositeClip);
 			DBG(("%s: render inplace op=%d, color=%08x\n",
 			     __FUNCTION__, op, color));
 
 			if (sigtrap_get() == 0) {
-				tor_render(NULL, &tor, (void*)&inplace,
-					   dst->pCompositeClip, span, false);
+				tor_render(NULL, &tor,
+					   (void*)&inplace, (void*)&clipped,
+					   span, false);
 				sigtrap_put();
 			}
 		} else if (is_solid) {
@@ -2791,15 +2719,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 							     1, 1, pi.bits, 0);
 			pixman_image_set_repeat(pi.source, PIXMAN_REPEAT_NORMAL);
 
-			if (dst->pCompositeClip->data)
-				span = pixmask_span_solid__clipped;
-			else
-				span = pixmask_span_solid;
-
+			span = clipped_span(&clipped, pixmask_span_solid, dst->pCompositeClip);
 			if (sigtrap_get() == 0) {
-				tor_render(NULL, &tor, (void*)&pi,
-					   dst->pCompositeClip, span,
-					   false);
+				tor_render(NULL, &tor,
+					   (void*)&pi, (void*)&clipped,
+					    span, false);
 				sigtrap_put();
 			}
 
@@ -2820,15 +2744,11 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 			pi.bits = pixman_image_get_data(pi.mask);
 			pi.op = op;
 
-			if (dst->pCompositeClip->data)
-				span = pixmask_span__clipped;
-			else
-				span = pixmask_span;
-
+			span = clipped_span(&clipped, pixmask_span, dst->pCompositeClip);
 			if (sigtrap_get() == 0) {
-				tor_render(NULL, &tor, (void*)&pi,
-					   dst->pCompositeClip, span,
-					   false);
+				tor_render(NULL, &tor,
+					   (void*)&pi, (void *)&clipped,
+					   span, false);
 				sigtrap_put();
 			}
 
@@ -2890,9 +2810,9 @@ trapezoid_span_inplace__x8r8g8b8(CARD8 op,
 
 struct inplace_thread {
 	xTrapezoid *traps;
-	RegionPtr clip;
 	span_func_t span;
 	struct inplace inplace;
+	struct clipped_span clipped;
 	BoxRec extents;
 	int dx, dy;
 	int draw_x, draw_y;
@@ -2917,8 +2837,9 @@ static void inplace_thread(void *arg)
 		tor_add_trapezoid(&tor, &thread->traps[n], thread->dx, thread->dy);
 	}
 
-	tor_render(NULL, &tor, (void*)&thread->inplace,
-		   thread->clip, thread->span, thread->unbounded);
+	tor_render(NULL, &tor, 
+		   (void*)&thread->inplace, (void*)&thread->clipped,
+		   thread->span, thread->unbounded);
 
 	tor_fini(&tor);
 }
@@ -2932,6 +2853,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
 			       bool fallback)
 {
 	struct inplace inplace;
+	struct clipped_span clipped;
 	span_func_t span;
 	PixmapPtr pixmap;
 	struct sna_pixmap *priv;
@@ -3049,21 +2971,12 @@ precise_trapezoid_span_inplace(struct sna *sna,
 	     dst->pCompositeClip->data != NULL));
 
 	if (op == PictOpSrc) {
-		if (dst->pCompositeClip->data)
-			span = tor_blt_src_clipped;
-		else
-			span = tor_blt_src;
+		span = tor_blt_src;
 	} else if (op == PictOpIn) {
-		if (dst->pCompositeClip->data)
-			span = tor_blt_in_clipped;
-		else
-			span = tor_blt_in;
+		span = tor_blt_in;
 	} else {
 		assert(op == PictOpAdd);
-		if (dst->pCompositeClip->data)
-			span = tor_blt_add_clipped;
-		else
-			span = tor_blt_add;
+		span = tor_blt_add;
 	}
 
 	DBG(("%s: move-to-cpu(dst)\n", __FUNCTION__));
@@ -3081,6 +2994,8 @@ precise_trapezoid_span_inplace(struct sna *sna,
 	inplace.stride = pixmap->devKind;
 	inplace.opacity = color >> 24;
 
+	span = clipped_span(&clipped, span, dst->pCompositeClip);
+
 	num_threads = 1;
 	if (!NO_GPU_THREADS &&
 	    (flags & COMPOSITE_SPANS_RECTILINEAR) == 0)
@@ -3103,8 +3018,9 @@ precise_trapezoid_span_inplace(struct sna *sna,
 		}
 
 		if (sigtrap_get() == 0) {
-			tor_render(NULL, &tor, (void*)&inplace,
-				   dst->pCompositeClip, span, unbounded);
+			tor_render(NULL, &tor,
+				   (void*)&inplace, (void *)&clipped,
+				   span, unbounded);
 			sigtrap_put();
 		}
 
@@ -3122,7 +3038,7 @@ precise_trapezoid_span_inplace(struct sna *sna,
 		threads[0].ntrap = ntrap;
 		threads[0].inplace = inplace;
 		threads[0].extents = region.extents;
-		threads[0].clip = dst->pCompositeClip;
+		threads[0].clipped = clipped;
 		threads[0].span = span;
 		threads[0].unbounded = unbounded;
 		threads[0].dx = dx;
@@ -3345,8 +3261,7 @@ tristrip_thread(void *arg)
 	if (!tor_init(&tor, &thread->extents, 2*thread->count))
 		return;
 
-	boxes.op = thread->op;
-	boxes.num_boxes = 0;
+	span_thread_boxes_init(&boxes, thread->op, thread->clip);
 
 	cw = 0; ccw = 1;
 	polygon_add_line(tor.polygon,
commit e0d9953e3b9cb159cce825fd473c4248efdd8d12
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Feb 6 12:27:48 2015 +0000

    sna: Only do COW creation on read-only migration if forced
    
    For reads, we can use the existing clone -- except if the migration is
    required to return a unique GPU handle to the pixmap.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b4115d7..6aa4b27 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1866,7 +1866,9 @@ sna_pixmap_undo_cow(struct sna *sna, struct sna_pixmap *priv, unsigned flags)
 	assert(priv->gpu_bo == cow->bo);
 	assert(cow->refcnt);
 
-	if (flags && (flags & MOVE_WRITE) == 0 && IS_COW_OWNER(priv->cow))
+	if (flags && /* flags == 0 => force decouple */
+	    (flags & MOVE_WRITE) == 0 &&
+	    (((flags & __MOVE_FORCE) == 0) || IS_COW_OWNER(priv->cow)))
 		return true;
 
 	if (!IS_COW_OWNER(priv->cow))
@@ -3218,6 +3220,7 @@ __sna_pixmap_for_gpu(struct sna *sna, PixmapPtr pixmap, unsigned flags)
 {
 	struct sna_pixmap *priv;
 
+	assert(flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE));
 	if ((flags & __MOVE_FORCE) == 0 && wedged(sna))
 		return NULL;
 
@@ -3296,12 +3299,14 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 	if (priv->cow) {
 		unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE);
 
+		assert(cow);
+
 		if ((flags & MOVE_READ) == 0) {
 			if (priv->gpu_damage) {
 				r.extents = *box;
 				r.data = NULL;
 				if (!region_subsumes_damage(&r, priv->gpu_damage))
-					cow |= MOVE_READ;
+					cow |= MOVE_READ | __MOVE_FORCE;
 			}
 		} else {
 			if (priv->cpu_damage) {
@@ -3312,13 +3317,11 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 			}
 		}
 
-		if (cow) {
-			if (!sna_pixmap_undo_cow(sna, priv, cow))
-				return NULL;
+		if (!sna_pixmap_undo_cow(sna, priv, cow))
+			return NULL;
 
-			if (priv->gpu_bo == NULL)
-				sna_damage_destroy(&priv->gpu_damage);
-		}
+		if (priv->gpu_bo == NULL)
+			sna_damage_destroy(&priv->gpu_damage);
 	}
 
 	if (sna_damage_is_all(&priv->gpu_damage,
@@ -3536,7 +3539,8 @@ sna_drawable_use_bo(DrawablePtr drawable, unsigned flags, const BoxRec *box,
 	}
 
 	if (priv->cow) {
-		unsigned cow = MOVE_WRITE | MOVE_READ;
+		unsigned cow = MOVE_WRITE | MOVE_READ | __MOVE_FORCE;
+		assert(cow);
 
 		if (flags & IGNORE_DAMAGE) {
 			if (priv->gpu_damage) {
@@ -4130,15 +4134,14 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 
 	if (priv->cow) {
 		unsigned cow = flags & (MOVE_READ | MOVE_WRITE | __MOVE_FORCE);
+		assert(cow);
 		if (flags & MOVE_READ && priv->cpu_damage)
 			cow |= MOVE_WRITE;
-		if (cow) {
-			if (!sna_pixmap_undo_cow(sna, priv, cow))
-				return NULL;
+		if (!sna_pixmap_undo_cow(sna, priv, cow))
+			return NULL;
 
-			if (priv->gpu_bo == NULL)
-				sna_damage_destroy(&priv->gpu_damage);
-		}
+		if (priv->gpu_bo == NULL)
+			sna_damage_destroy(&priv->gpu_damage);
 	}
 
 	if (sna_damage_is_all(&priv->gpu_damage,


More information about the xorg-commit mailing list