xf86-video-intel: 5 commits - src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_render.h src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Sun Mar 11 12:55:24 PDT 2012


 src/sna/kgem.c           |    6 
 src/sna/kgem.h           |    1 
 src/sna/sna_accel.c      |   67 +++-
 src/sna/sna_blt.c        |   31 +-
 src/sna/sna_render.h     |    1 
 src/sna/sna_trapezoids.c |  687 ++++++++++++++++++++++++++++++++++++++++++++---
 6 files changed, 736 insertions(+), 57 deletions(-)

New commits:
commit 8136bc5e113ae06c30def3c91b1615e5fab8af44
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Sun Mar 11 19:45:55 2012 +0000

    sna: Make the maximum BLT pitch assertions consistent
    
    The maximum permissibly BLT pitch value is 32767, so make the assertions
    match...
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=47206
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index d70e30e..eb8dbf8 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -118,6 +118,7 @@ static bool sna_blt_fill_init(struct sna *sna,
 {
 	struct kgem *kgem = &sna->kgem;
 
+	assert(kgem_bo_can_blt (kgem, bo));
 	assert(bo->tiling != I915_TILING_Y);
 	blt->bo[0] = bo;
 
@@ -127,7 +128,7 @@ static bool sna_blt_fill_init(struct sna *sna,
 		blt->cmd |= BLT_DST_TILED;
 		blt->br13 >>= 2;
 	}
-	assert(blt->br13 < MAXSHORT);
+	assert(blt->br13 <= MAXSHORT);
 
 	if (alu == GXclear)
 		pixel = 0;
@@ -258,6 +259,9 @@ static Bool sna_blt_copy_init(struct sna *sna,
 {
 	struct kgem *kgem = &sna->kgem;
 
+	assert(kgem_bo_can_blt (kgem, src));
+	assert(kgem_bo_can_blt (kgem, dst));
+
 	blt->bo[0] = src;
 	blt->bo[1] = dst;
 
@@ -270,14 +274,14 @@ static Bool sna_blt_copy_init(struct sna *sna,
 		blt->cmd |= BLT_SRC_TILED;
 		blt->pitch[0] >>= 2;
 	}
-	assert(blt->pitch[0] < MAXSHORT);
+	assert(blt->pitch[0] <= MAXSHORT);
 
 	blt->pitch[1] = dst->pitch;
 	if (kgem->gen >= 40 && dst->tiling) {
 		blt->cmd |= BLT_DST_TILED;
 		blt->pitch[1] >>= 2;
 	}
-	assert(blt->pitch[1] < MAXSHORT);
+	assert(blt->pitch[1] <= MAXSHORT);
 
 	blt->overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
 	blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1];
@@ -308,6 +312,9 @@ static Bool sna_blt_alpha_fixup_init(struct sna *sna,
 {
 	struct kgem *kgem = &sna->kgem;
 
+	assert(kgem_bo_can_blt (kgem, src));
+	assert(kgem_bo_can_blt (kgem, dst));
+
 	blt->bo[0] = src;
 	blt->bo[1] = dst;
 
@@ -317,14 +324,14 @@ static Bool sna_blt_alpha_fixup_init(struct sna *sna,
 		blt->cmd |= BLT_SRC_TILED;
 		blt->pitch[0] >>= 2;
 	}
-	assert(blt->pitch[0] < MAXSHORT);
+	assert(blt->pitch[0] <= MAXSHORT);
 
 	blt->pitch[1] = dst->pitch;
 	if (kgem->gen >= 40 && dst->tiling) {
 		blt->cmd |= BLT_DST_TILED;
 		blt->pitch[1] >>= 2;
 	}
-	assert(blt->pitch[1] < MAXSHORT);
+	assert(blt->pitch[1] <= MAXSHORT);
 
 	blt->overwrites = 1;
 	blt->br13 = (0xfc << 16) | blt->pitch[1];
@@ -1829,6 +1836,8 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 	uint32_t br13, cmd, *b;
 	bool overwrites;
 
+	assert(kgem_bo_can_blt (kgem, bo));
+
 	DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__,
 	     box->x1, box->y1, box->x2, box->y2));
 
@@ -1841,7 +1850,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
 		cmd |= BLT_DST_TILED;
 		br13 >>= 2;
 	}
-	assert(br13 < MAXSHORT);
+	assert(br13 <= MAXSHORT);
 
 	br13 |= fill_ROP[alu] << 16;
 	switch (bpp) {
@@ -1954,7 +1963,7 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
 		cmd |= 1 << 11;
 		br13 >>= 2;
 	}
-	assert(br13 < MAXSHORT);
+	assert(br13 <= MAXSHORT);
 
 	br13 |= 1<<31 | fill_ROP[alu] << 16;
 	switch (bpp) {
@@ -2105,7 +2114,7 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
 		cmd |= BLT_DST_TILED;
 		br13 >>= 2;
 	}
-	assert(br13 < MAXSHORT);
+	assert(br13 <= MAXSHORT);
 
 	br13 |= copy_ROP[alu] << 16;
 	switch (bpp) {
commit 989615493608525fc252e4e94ac7259cba0741f5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 9 22:44:16 2012 +0000

    sna: Feed fallback mono trapezoids through the mono rasteriser
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 8420730..d70e30e 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -631,6 +631,12 @@ sna_rgba_for_color(uint32_t color, int depth)
 	return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8);
 }
 
+uint32_t
+sna_rgba_to_color(uint32_t rgba, uint32_t format)
+{
+	return color_convert(rgba, PICT_a8r8g8b8, format);
+}
+
 static uint32_t
 get_pixel(PicturePtr picture)
 {
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 71a6fc5..e6015af 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -482,6 +482,7 @@ sna_render_get_gradient(struct sna *sna,
 			PictGradient *pattern);
 
 uint32_t sna_rgba_for_color(uint32_t color, int depth);
+uint32_t sna_rgba_to_color(uint32_t rgba, uint32_t format);
 Bool sna_picture_is_solid(PicturePtr picture, uint32_t *color);
 
 void no_render_init(struct sna *sna);
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 4493331..3e2802e 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -3681,6 +3681,262 @@ tor_blt_add_clipped_mono(struct sna *sna,
 		tor_blt_add_clipped(sna, op, clip, box, FAST_SAMPLES_XY);
 }
 
+struct mono_inplace_composite {
+	pixman_image_t *src, *dst;
+	int dx, dy;
+	int sx, sy;
+	int op;
+};
+struct mono_inplace_fill {
+	uint32_t *data, stride;
+	uint32_t color;
+	int bpp;
+};
+
+fastcall static void
+mono_inplace_fill_box(struct sna *sna,
+		      const struct sna_composite_op *op,
+		      const BoxRec *box)
+{
+	struct mono_inplace_fill *fill = op->priv;
+
+	DBG(("(%s: (%d, %d)x(%d, %d):%08x\n",
+	     __FUNCTION__,
+	     box->x1, box->y1,
+	     box->x2 - box->x1,
+	     box->y2 - box->y1,
+	     fill->color));
+	pixman_fill(fill->data, fill->stride, fill->bpp,
+		    box->x1, box->y1,
+		    box->x2 - box->x1,
+		    box->y2 - box->y1,
+		    fill->color);
+}
+
+static void
+mono_inplace_fill_boxes(struct sna *sna,
+			const struct sna_composite_op *op,
+			const BoxRec *box, int nbox)
+{
+	struct mono_inplace_fill *fill = op->priv;
+
+	do {
+		DBG(("(%s: (%d, %d)x(%d, %d):%08x\n",
+		     __FUNCTION__,
+		     box->x1, box->y1,
+		     box->x2 - box->x1,
+		     box->y2 - box->y1,
+		     fill->color));
+		pixman_fill(fill->data, fill->stride, fill->bpp,
+			    box->x1, box->y1,
+			    box->x2 - box->x1,
+			    box->y2 - box->y1,
+			    fill->color);
+		box++;
+	} while (--nbox);
+}
+
+fastcall static void
+mono_inplace_composite_box(struct sna *sna,
+			   const struct sna_composite_op *op,
+			   const BoxRec *box)
+{
+	struct mono_inplace_composite *c = op->priv;
+
+	pixman_image_composite(c->op, c->src, NULL, c->dst,
+			       box->x1 + c->sx, box->y1 + c->sy,
+			       0, 0,
+			       box->x1 + c->dx, box->y1 + c->dy,
+			       box->x2 - box->x1,
+			       box->y2 - box->y1);
+}
+
+static void
+mono_inplace_composite_boxes(struct sna *sna,
+			     const struct sna_composite_op *op,
+			     const BoxRec *box, int nbox)
+{
+	struct mono_inplace_composite *c = op->priv;
+
+	do {
+		pixman_image_composite(c->op, c->src, NULL, c->dst,
+				       box->x1 + c->sx, box->y1 + c->sy,
+				       0, 0,
+				       box->x1 + c->dx, box->y1 + c->dy,
+				       box->x2 - box->x1,
+				       box->y2 - box->y1);
+		box++;
+	} while (--nbox);
+}
+
+static bool
+trapezoid_span_mono_inplace(CARD8 op,
+			    PicturePtr src,
+			    PicturePtr dst,
+			    INT16 src_x, INT16 src_y,
+			    int ntrap, xTrapezoid *traps)
+{
+	struct mono mono;
+	union {
+		struct mono_inplace_fill fill;
+		struct mono_inplace_composite composite;
+	} inplace;
+	int was_clear;
+	int x, y, n;
+
+	trapezoids_bounds(ntrap, traps, &mono.clip.extents);
+	if (mono.clip.extents.y1 >= mono.clip.extents.y2 ||
+	    mono.clip.extents.x1 >= mono.clip.extents.x2)
+		return true;
+
+	DBG(("%s: extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__,
+	     mono.clip.extents.x1, mono.clip.extents.y1,
+	     mono.clip.extents.x2, mono.clip.extents.y2));
+
+	if (!sna_compute_composite_region(&mono.clip,
+					  src, NULL, dst,
+					  src_x, src_y,
+					  0, 0,
+					  mono.clip.extents.x1, mono.clip.extents.y1,
+					  mono.clip.extents.x2 - mono.clip.extents.x1,
+					  mono.clip.extents.y2 - mono.clip.extents.y1)) {
+		DBG(("%s: trapezoids do not intersect drawable clips\n",
+		     __FUNCTION__)) ;
+		return true;
+	}
+
+	DBG(("%s: clipped extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__,
+	     mono.clip.extents.x1, mono.clip.extents.y1,
+	     mono.clip.extents.x2, mono.clip.extents.y2));
+
+	was_clear = sna_drawable_is_clear(dst->pDrawable);
+	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &mono.clip,
+					     MOVE_WRITE | MOVE_READ))
+		return true;
+
+	mono.sna = to_sna_from_drawable(dst->pDrawable);
+	if (!mono_init(&mono, 2*ntrap))
+		return false;
+
+	mono.op.damage = NULL;
+
+	x = dst->pDrawable->x;
+	y = dst->pDrawable->y;
+
+	for (n = 0; n < ntrap; n++) {
+		if (!xTrapezoidValid(&traps[n]))
+			continue;
+
+		if (pixman_fixed_to_int(traps[n].top) + y >= mono.clip.extents.y2 ||
+		    pixman_fixed_to_int(traps[n].bottom) + y < mono.clip.extents.y1)
+			continue;
+
+		mono_add_line(&mono, x, y,
+			      traps[n].top, traps[n].bottom,
+			      &traps[n].left.p1, &traps[n].left.p2, 1);
+		mono_add_line(&mono, x, y,
+			      traps[n].top, traps[n].bottom,
+			      &traps[n].right.p1, &traps[n].right.p2, -1);
+	}
+
+	if (sna_picture_is_solid(src, &inplace.fill.color) &&
+	    (op == PictOpSrc || op == PictOpClear ||
+	     (op == PictOpOver && inplace.fill.color >> 24 == 0xff))) {
+		PixmapPtr pixmap;
+		int16_t dx, dy;
+		uint8_t *ptr;
+
+unbounded_pass:
+		pixmap = get_drawable_pixmap(dst->pDrawable);
+		get_drawable_deltas(dst->pDrawable, pixmap, &dx, &dy);
+
+		ptr = pixmap->devPrivate.ptr;
+		ptr += dy * pixmap->devKind + dx * pixmap->drawable.bitsPerPixel / 8;
+		inplace.fill.data = (uint32_t *)ptr;
+		inplace.fill.stride = pixmap->devKind / sizeof(uint32_t);
+		inplace.fill.bpp = pixmap->drawable.bitsPerPixel;
+
+		if (op == PictOpClear)
+			inplace.fill.color = 0;
+		else if (dst->format != PICT_a8r8g8b8)
+			inplace.fill.color = sna_rgba_to_color(inplace.fill.color, dst->format);
+
+		DBG(("%s: fill %x\n", __FUNCTION__, inplace.fill.color));
+
+		mono.op.priv = &inplace.fill;
+		mono.op.box = mono_inplace_fill_box;
+		mono.op.boxes = mono_inplace_fill_boxes;
+
+		op = 0;
+	} else {
+		inplace.composite.dst = image_from_pict(dst, FALSE,
+							&inplace.composite.dx,
+							&inplace.composite.dy);
+		inplace.composite.src = image_from_pict(src, FALSE,
+							&inplace.composite.sx,
+							&inplace.composite.sy);
+		inplace.composite.sx +=
+			src_x - pixman_fixed_to_int(traps[0].left.p1.x),
+		inplace.composite.sy +=
+			src_y - pixman_fixed_to_int(traps[0].left.p1.y),
+		inplace.composite.op = op;
+
+		mono.op.priv = &inplace.composite;
+		mono.op.box = mono_inplace_composite_box;
+		mono.op.boxes = mono_inplace_composite_boxes;
+	}
+	mono_render(&mono);
+	mono_fini(&mono);
+
+	if (op) {
+		free_pixman_pict(src, inplace.composite.src);
+		free_pixman_pict(dst, inplace.composite.dst);
+
+		if (!was_clear && !operator_is_bounded(op)) {
+			xPointFixed p1, p2;
+
+			DBG(("%s: unbounded fixup\n", __FUNCTION__));
+
+			if (!mono_init(&mono, 2+2*ntrap))
+				return false;
+
+			p1.y = mono.clip.extents.y1 * pixman_fixed_1;
+			p2.y = mono.clip.extents.y2 * pixman_fixed_1;
+
+			p1.x = mono.clip.extents.x1 * pixman_fixed_1;
+			p2.x = mono.clip.extents.x1 * pixman_fixed_1;
+			mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, -1);
+
+			p1.x = mono.clip.extents.x2 * pixman_fixed_1;
+			p2.x = mono.clip.extents.x2 * pixman_fixed_1;
+			mono_add_line(&mono, 0, 0, p1.y, p2.y, &p1, &p2, 1);
+
+			for (n = 0; n < ntrap; n++) {
+				if (!xTrapezoidValid(&traps[n]))
+					continue;
+
+				if (pixman_fixed_to_int(traps[n].top) + x >= mono.clip.extents.y2 ||
+				    pixman_fixed_to_int(traps[n].bottom) + y < mono.clip.extents.y1)
+					continue;
+
+				mono_add_line(&mono, x, y,
+					      traps[n].top, traps[n].bottom,
+					      &traps[n].left.p1, &traps[n].left.p2, 1);
+				mono_add_line(&mono, x, y,
+					      traps[n].top, traps[n].bottom,
+					      &traps[n].right.p1, &traps[n].right.p2, -1);
+			}
+
+			op = PictOpClear;
+			goto unbounded_pass;
+		}
+	}
+
+	return true;
+}
+
 static bool
 trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 		       PictFormatPtr maskFormat, INT16 src_x, INT16 src_y,
@@ -3713,7 +3969,24 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 		return false;
 	}
 
-	if (dst->format != PICT_a8 || !sna_picture_is_solid(src, &color)) {
+	if (!fallback && is_gpu(dst->pDrawable)) {
+		DBG(("%s: fallback -- can not perform operation in place, destination busy\n",
+		     __FUNCTION__));
+
+		return false;
+	}
+
+	if (is_mono(dst, maskFormat))
+		return trapezoid_span_mono_inplace(op, src, dst,
+						   src_x, src_y, ntrap, traps);
+
+	if (!sna_picture_is_solid(src, &color)) {
+		DBG(("%s: fallback -- can not perform operation in place, requires solid source\n",
+		     __FUNCTION__));
+		return false;
+	}
+
+	if (dst->format != PICT_a8) {
 		DBG(("%s: fallback -- can not perform operation in place, format=%x\n",
 		     __FUNCTION__, dst->format));
 		return false;
@@ -3744,8 +4017,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 		     __FUNCTION__, op));
 		return false;
 	}
-	if (!fallback && is_gpu(dst->pDrawable))
-		return false;
 
 	DBG(("%s: format=%x, op=%d, color=%x\n",
 	     __FUNCTION__, dst->format, op, color));
commit 552e4fbd2c25eb5ab0ae77e11f5f8ba2fdb29daa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 9 20:02:44 2012 +0000

    sna/traps: Add a fast path for narrow masks
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 5773d66..db579d0 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3785,6 +3785,12 @@ done:
 	return kgem_create_proxy(&bo->base, offset, size);
 }
 
+bool kgem_buffer_is_inplace(struct kgem_bo *_bo)
+{
+	struct kgem_partial_bo *bo = (struct kgem_partial_bo *)_bo->proxy;
+	return bo->write & KGEM_BUFFER_WRITE_INPLACE;
+}
+
 struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
 				      int width, int height, int bpp,
 				      uint32_t flags,
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 6c31f33..dff8bb2 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -503,6 +503,7 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
 				      int width, int height, int bpp,
 				      uint32_t flags,
 				      void **ret);
+bool kgem_buffer_is_inplace(struct kgem_bo *bo);
 void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *bo);
 
 void kgem_bo_clear_scanout(struct kgem *kgem, struct kgem_bo *bo);
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3429438..3619101 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2007,17 +2007,10 @@ sna_pixmap_create_upload(ScreenPtr screen,
 		pixmap = sna->freed_pixmap;
 		sna->freed_pixmap = NULL;
 
-		pixmap->usage_hint = CREATE_PIXMAP_USAGE_SCRATCH;
 		pixmap->drawable.serialNumber = NEXT_SERIAL_NUMBER;
 		pixmap->refcnt = 1;
-
-		DBG(("%s: serial=%ld, usage=%d\n",
-		     __FUNCTION__,
-		     pixmap->drawable.serialNumber,
-		     pixmap->usage_hint));
 	} else {
-		pixmap = create_pixmap(sna, screen, 0, 0, depth,
-				       CREATE_PIXMAP_USAGE_SCRATCH);
+		pixmap = create_pixmap(sna, screen, 0, 0, depth, 0);
 		if (!pixmap)
 			return NullPixmap;
 
@@ -2035,8 +2028,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
 
 	priv->gpu_bo = kgem_create_buffer_2d(&sna->kgem,
 					     width, height, bpp,
-					     flags,
-					     &ptr);
+					     flags, &ptr);
 	if (!priv->gpu_bo) {
 		free(priv);
 		fbDestroyPixmap(pixmap);
@@ -2058,6 +2050,15 @@ sna_pixmap_create_upload(ScreenPtr screen,
 	pixmap->devKind = priv->gpu_bo->pitch;
 	pixmap->devPrivate.ptr = ptr;
 
+	pixmap->usage_hint = 0;
+	if (!kgem_buffer_is_inplace(priv->gpu_bo))
+		pixmap->usage_hint = 1;
+
+	DBG(("%s: serial=%ld, usage=%d\n",
+	     __FUNCTION__,
+	     pixmap->drawable.serialNumber,
+	     pixmap->usage_hint));
+
 	return pixmap;
 }
 
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 8c6cf34..4493331 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1409,6 +1409,342 @@ tor_render(struct sna *sna,
 	}
 }
 
+static void
+inplace_row(struct active_list *active, uint8_t *row, int width)
+{
+	struct edge *left = active->head.next;
+
+	assert(active->is_vertical);
+
+	while (&active->tail != left) {
+		struct edge *right;
+		int winding = left->dir;
+		grid_scaled_x_t lfx, rfx;
+		int lix, rix;
+
+		left->height_left -= FAST_SAMPLES_Y;
+		if (!left->height_left) {
+			left->prev->next = left->next;
+			left->next->prev = left->prev;
+		}
+
+		right = left->next;
+		do {
+			right->height_left -= FAST_SAMPLES_Y;
+			if (!right->height_left) {
+				right->prev->next = right->next;
+				right->next->prev = right->prev;
+			}
+
+			winding += right->dir;
+			if (0 == winding)
+				break;
+
+			right = right->next;
+		} while (1);
+
+		if (left->x.quo < 0) {
+			lix = lfx = 0;
+		} else if (left->x.quo > width * FAST_SAMPLES_X) {
+			lix = width;
+			lfx = 0;
+		} else
+			FAST_SAMPLES_X_TO_INT_FRAC(left->x.quo, lix, lfx);
+
+		if (right->x.quo < 0) {
+			rix = rfx = 0;
+		} else if (right->x.quo > width * FAST_SAMPLES_X) {
+			rix = width;
+			rfx = 0;
+		} else
+			FAST_SAMPLES_X_TO_INT_FRAC(right->x.quo, rix, rfx);
+		if (lix == rix) {
+			if (rfx != lfx)
+				row[lix] += (rfx-lfx) * 256 / FAST_SAMPLES_X;
+		} else {
+			if (lfx == 0)
+				row[lix] = 0xff;
+			else
+				row[lix] += 256 - lfx * 256 / FAST_SAMPLES_X;
+
+			if (rfx)
+				row[rix] += rfx * 256 / FAST_SAMPLES_X;
+
+			if (rix > ++lix) {
+				rix -= lix;
+#if 0
+				if (rix == 1)
+					row[lix] = 0xff;
+				else
+					memset(row+lix, 0xff, rix);
+#else
+				while (rix && lix & 3)
+					row[lix++] = 0xff, rix--;
+				while (rix > 4) {
+					*(uint32_t *)(row+lix) = 0xffffffff;
+					lix += 4;
+					rix -= 4;
+				}
+				if (rix & 2) {
+					*(uint16_t *)(row+lix) = 0xffff;
+					lix += 2;
+				}
+				if (rix & 1)
+					row[lix] = 0xff;
+#endif
+			}
+		}
+
+		left = right->next;
+	}
+}
+
+static inline uint8_t clip255(int x)
+{
+	if (x > 255)
+		return 255;
+
+	return x;
+}
+
+inline static void
+inplace_subrow(struct active_list *active, int8_t *row,
+	       int width, int *min, int *max)
+{
+	struct edge *edge = active->head.next;
+	grid_scaled_x_t prev_x = INT_MIN;
+	int winding = 0, xstart = INT_MIN;
+
+	while (&active->tail != edge) {
+		struct edge *next = edge->next;
+
+		winding += edge->dir;
+		if (0 == winding) {
+			if (edge->x.quo >= FAST_SAMPLES_X * width) {
+				*max = width;
+			} else if (edge->next->x.quo != edge->x.quo) {
+				grid_scaled_x_t fx;
+				int ix;
+
+				xstart = edge->x.quo;
+				FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
+				row[ix++] -= FAST_SAMPLES_X - fx;
+				if (ix < width)
+					row[ix] -= fx;
+
+				if (ix > *max)
+					*max = ix;
+
+				xstart = INT_MIN;
+			}
+		} else if (xstart < 0) {
+			grid_scaled_x_t fx;
+			int ix;
+
+			xstart = MAX(edge->x.quo, 0);
+			FAST_SAMPLES_X_TO_INT_FRAC(xstart, ix, fx);
+			if (ix < *min)
+				*min = ix;
+
+			row[ix++] += FAST_SAMPLES_X - fx;
+			row[ix] += fx;
+		}
+
+		if (--edge->height_left) {
+			if (!edge->vertical) {
+				edge->x.quo += edge->dxdy.quo;
+				edge->x.rem += edge->dxdy.rem;
+				if (edge->x.rem >= 0) {
+					++edge->x.quo;
+					edge->x.rem -= edge->dy;
+				}
+			}
+
+			if (edge->x.quo < prev_x) {
+				struct edge *pos = edge->prev;
+				pos->next = next;
+				next->prev = pos;
+				do {
+					pos = pos->prev;
+				} while (edge->x.quo < pos->x.quo);
+				pos->next->prev = edge;
+				edge->next = pos->next;
+				edge->prev = pos;
+				pos->next = edge;
+			} else
+				prev_x = edge->x.quo;
+		} else {
+			edge->prev->next = next;
+			next->prev = edge->prev;
+		}
+
+		edge = next;
+	}
+}
+
+inline static void
+inplace_end_subrows(struct active_list *active, uint8_t *row,
+		    int8_t *buf, int width)
+{
+	int cover = 0;
+
+	while (width > 4) {
+		uint32_t dw;
+		int v;
+
+		dw = *(uint32_t *)buf;
+		buf += 4;
+
+		if (dw == 0){
+			v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+			v -= v >> 8;
+			v |= v << 8;
+			dw = v | v << 16;
+		} else if (dw) {
+			cover += (int8_t)(dw & 0xff);
+			assert(cover >= 0);
+			v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+			v -= v >> 8;
+			dw >>= 8;
+			dw |= v << 24;
+
+			cover += (int8_t)(dw & 0xff);
+			assert(cover >= 0);
+			v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+			v -= v >> 8;
+			dw >>= 8;
+			dw |= v << 24;
+
+			cover += (int8_t)(dw & 0xff);
+			assert(cover >= 0);
+			v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+			v -= v >> 8;
+			dw >>= 8;
+			dw |= v << 24;
+
+			cover += (int8_t)(dw & 0xff);
+			assert(cover >= 0);
+			v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+			v -= v >> 8;
+			dw >>= 8;
+			dw |= v << 24;
+		}
+
+		*(uint32_t *)row = dw;
+		row += 4;
+
+		width -= 4;
+	}
+
+	while (width--) {
+		int v;
+
+		cover += *buf++;
+		assert(cover >= 0);
+
+		v = cover * 256 / (FAST_SAMPLES_X * FAST_SAMPLES_Y);
+		v -= v >> 8;
+		*row++ = v;
+	}
+}
+
+#define TOR_INPLACE_SIZE 128
+static void
+tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
+{
+	int i, j, h = converter->ymax;
+	struct polygon *polygon = converter->polygon;
+	struct active_list *active = converter->active;
+	struct edge *buckets[FAST_SAMPLES_Y] = { 0 };
+	uint8_t *row = scratch->devPrivate.ptr;
+	int stride = scratch->devKind;
+	int width = scratch->drawable.width;
+
+	__DBG(("%s: mono=%d, buf=%d\n", __FUNCTION__, mono, buf));
+	assert(!mono);
+
+	/* Render each pixel row. */
+	for (i = 0; i < h; i = j) {
+		int do_full_step = 0;
+		void *ptr = buf ?: row;
+
+		j = i + 1;
+
+		/* Determine if we can ignore this row or use the full pixel
+		 * stepper. */
+		if (!polygon->y_buckets[i]) {
+			if (active->head.next == &active->tail) {
+				active->min_height = INT_MAX;
+				active->is_vertical = 1;
+				for (; j < h && !polygon->y_buckets[j]; j++)
+					;
+				__DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n",
+				       __FUNCTION__, i, j));
+
+				memset(row, 0, stride*(j-i));
+				row += stride*(j-i);
+				continue;
+			}
+
+			do_full_step = can_full_step(active);
+		}
+
+		__DBG(("%s: y=%d [%d], do_full_step=%d, new edges=%d, min_height=%d, vertical=%d\n",
+		       __FUNCTION__,
+		       i, i+ymin, do_full_step,
+		       polygon->y_buckets[i] != NULL,
+		       active->min_height,
+		       active->is_vertical));
+		if (do_full_step) {
+			memset(ptr, 0, width);
+			inplace_row(active, ptr, width);
+			if (row != ptr)
+				memcpy(row, ptr, width);
+
+			if (active->is_vertical) {
+				while (j < h &&
+				       polygon->y_buckets[j] == NULL &&
+				       active->min_height >= 2*FAST_SAMPLES_Y)
+				{
+					active->min_height -= FAST_SAMPLES_Y;
+					row += stride;
+					memcpy(row, ptr, width);
+					j++;
+				}
+				if (j != i + 1)
+					step_edges(active, j - (i + 1));
+
+				__DBG(("%s: vertical edges, full step (%d, %d)\n",
+				       __FUNCTION__,  i, j));
+			}
+		} else {
+			grid_scaled_y_t suby;
+			int min = width, max = 0;
+
+			fill_buckets(active, polygon->y_buckets[i], buckets);
+
+			/* Subsample this row. */
+			memset(ptr, 0, width);
+			for (suby = 0; suby < FAST_SAMPLES_Y; suby++) {
+				if (buckets[suby]) {
+					merge_edges(active, buckets[suby]);
+					buckets[suby] = NULL;
+				}
+
+				inplace_subrow(active, ptr, width, &min, &max);
+			}
+			memset(row, 0, min);
+			if (max > min)
+				inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
+			if (max < width)
+				memset(row+max, 0, width-max);
+		}
+
+		active->min_height -= FAST_SAMPLES_Y;
+		row += stride;
+	}
+}
+
 struct mono_edge {
 	struct mono_edge *next, *prev;
 
@@ -1936,7 +2272,7 @@ trapezoids_bounds(int n, const xTrapezoid *t, BoxPtr box)
 		if (((x2 - t->right.p1.x) | (x2 - t->right.p2.x)) < 0) {
 			if (pixman_fixed_floor(t->right.p1.x) == pixman_fixed_floor(t->right.p2.x)) {
 				x2 = pixman_fixed_ceil(t->right.p1.x);
-			} else  {
+			} else {
 				if (t->right.p1.y == t->top)
 					fx1 = t->right.p1.x;
 				else
@@ -3007,7 +3343,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 			 int ntrap, xTrapezoid *traps)
 {
 	struct tor tor;
-	span_func_t span;
 	ScreenPtr screen = dst->pDrawable->pScreen;
 	PixmapPtr scratch;
 	PicturePtr mask;
@@ -3041,8 +3376,8 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
 		return true;
 
-	DBG(("%s: extents (%d, %d), (%d, %d)\n",
-	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
 
 	if (!sna_compute_composite_extents(&extents,
 					   src, NULL, dst,
@@ -3096,15 +3431,18 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 		tor_add_edge(&tor, &t, &t.right, -1);
 	}
 
-	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
-		span = tor_blt_mask_mono;
-	else
-		span = tor_blt_mask;
-
-	tor_render(NULL, &tor,
-		   scratch->devPrivate.ptr,
-		   (void *)(intptr_t)scratch->devKind,
-		   span, true);
+	if (extents.x2 <= TOR_INPLACE_SIZE) {
+		uint8_t buf[TOR_INPLACE_SIZE];
+		tor_inplace(&tor, scratch, is_mono(dst, maskFormat),
+			    scratch->usage_hint ? NULL : buf);
+	} else {
+		tor_render(NULL, &tor,
+			   scratch->devPrivate.ptr,
+			   (void *)(intptr_t)scratch->devKind,
+			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
+			   true);
+	}
+	tor_fini(&tor);
 
 	mask = CreatePicture(0, &scratch->drawable,
 			     PictureMatchFormat(screen, 8, PICT_a8),
@@ -3119,7 +3457,6 @@ trapezoid_mask_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 				 extents.x2, extents.y2);
 		FreePicture(mask, 0);
 	}
-	tor_fini(&tor);
 
 	return true;
 }
@@ -3535,7 +3872,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 			int ntrap, xTrapezoid *traps)
 {
 	struct tor tor;
-	span_func_t span;
 	ScreenPtr screen = dst->pDrawable->pScreen;
 	PixmapPtr scratch;
 	PicturePtr mask;
@@ -3569,8 +3905,8 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 	if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2)
 		return true;
 
-	DBG(("%s: extents (%d, %d), (%d, %d)\n",
-	     __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2));
+	DBG(("%s: ntraps=%d, extents (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, ntrap, extents.x1, extents.y1, extents.x2, extents.y2));
 
 	if (!sna_compute_composite_extents(&extents,
 					   src, NULL, dst,
@@ -3624,15 +3960,16 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 		tor_add_edge(&tor, &t, &t.right, -1);
 	}
 
-	if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp)
-		span = tor_blt_mask_mono;
-	else
-		span = tor_blt_mask;
-
-	tor_render(NULL, &tor,
-		   scratch->devPrivate.ptr,
-		   (void *)(intptr_t)scratch->devKind,
-		   span, true);
+	if (extents.x2 <= TOR_INPLACE_SIZE) {
+		tor_inplace(&tor, scratch, is_mono(dst, maskFormat), NULL);
+	} else {
+		tor_render(NULL, &tor,
+			   scratch->devPrivate.ptr,
+			   (void *)(intptr_t)scratch->devKind,
+			   is_mono(dst, maskFormat) ? tor_blt_mask_mono : tor_blt_mask,
+			   true);
+	}
+	tor_fini(&tor);
 
 	mask = CreatePicture(0, &scratch->drawable,
 			     PictureMatchFormat(screen, 8, PICT_a8),
@@ -3675,7 +4012,6 @@ trapezoid_span_fallback(CARD8 op, PicturePtr src, PicturePtr dst,
 done:
 		FreePicture(mask, 0);
 	}
-	tor_fini(&tor);
 
 	return true;
 }
commit 494edfaaacaae13adfa5e727c66a83cb2294d330
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 9 12:19:33 2012 +0000

    sna: Handle partial reads with a pending clear
    
    Skip the filling of the whole pixmap if we have a small read and we
    know the GPU bo is clear. Also choose to operate inplace on the GPU bo
    if we meet the usual criteria.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 5aad88b..3429438 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1257,13 +1257,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		return _sna_pixmap_move_to_cpu(pixmap, flags);
 	}
 
-	if (priv->clear) {
-		DBG(("%s: pending clear, moving whole pixmap\n", __FUNCTION__));
-		if (dx | dy)
-			RegionTranslate(region, -dx, -dy);
-		return _sna_pixmap_move_to_cpu(pixmap, flags | MOVE_READ);
-	}
-
 	if ((flags & MOVE_READ) == 0) {
 		DBG(("%s: no read, checking to see if we can stream the write into the GPU bo\n",
 		     __FUNCTION__));
@@ -1295,6 +1288,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 					sna_damage_add(&priv->gpu_damage,
 						       region);
 
+				priv->clear = false;
 				return true;
 			}
 		}
@@ -1333,6 +1327,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 			} else
 				sna_damage_add(&priv->gpu_damage, region);
 
+			priv->clear = false;
 			return true;
 		}
 	}
@@ -1354,12 +1349,20 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 			pixmap->devKind = priv->gpu_bo->pitch;
 			if (!DAMAGE_IS_ALL(priv->gpu_damage))
 				sna_damage_add(&priv->gpu_damage, region);
+			priv->clear = false;
 			return true;
 		}
 
 		priv->mapped = false;
 	}
 
+	if (priv->clear && flags & MOVE_WRITE) {
+		DBG(("%s: pending clear, moving whole pixmap for partial write\n", __FUNCTION__));
+		if (dx | dy)
+			RegionTranslate(region, -dx, -dy);
+		return _sna_pixmap_move_to_cpu(pixmap, flags | MOVE_READ);
+	}
+
 	if (priv->mapped) {
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
@@ -1372,6 +1375,35 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	if (priv->gpu_bo == NULL)
 		goto done;
 
+	if (priv->clear) {
+		int n = REGION_NUM_RECTS(region);
+		BoxPtr box = REGION_RECTS(region);
+
+		DBG(("%s: pending clear, doing partial fill\n", __FUNCTION__));
+		if (priv->cpu_bo) {
+			DBG(("%s: syncing CPU bo\n", __FUNCTION__));
+			kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
+		}
+
+		do {
+			pixman_fill(pixmap->devPrivate.ptr,
+				    pixmap->devKind/sizeof(uint32_t),
+				    pixmap->drawable.bitsPerPixel,
+				    box->x1, box->y1,
+				    box->x2 - box->x1,
+				    box->y2 - box->y1,
+				    priv->clear_color);
+			box++;
+		} while (--n);
+
+		if (region->extents.x2 - region->extents.x1 > 1 ||
+		    region->extents.y2 - region->extents.y1 > 1) {
+			sna_damage_subtract(&priv->gpu_damage, region);
+			priv->clear = false;
+		}
+		goto done;
+	}
+
 	if ((flags & MOVE_READ) == 0) {
 		assert(flags & MOVE_WRITE);
 		sna_damage_subtract(&priv->gpu_damage, region);
commit bd62dc73dcdbab34aa5c83382e46c7315d554a1a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 9 10:37:34 2012 +0000

    sna/traps: Apply somes simple but common operator reduction for clipmasks
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index e28c669..8c6cf34 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -3354,8 +3354,10 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 	struct inplace inplace;
 	span_func_t span;
 	PixmapPtr pixmap;
+	struct sna_pixmap *priv;
 	RegionRec region;
 	uint32_t color;
+	bool unbounded;
 	int16_t dst_x, dst_y;
 	int dx, dy;
 	int n;
@@ -3380,18 +3382,33 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 		return false;
 	}
 
+	pixmap = get_drawable_pixmap(dst->pDrawable);
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL) {
+		DBG(("%s: fallback -- unattached\n", __FUNCTION__));
+		return false;
+	}
+
+	unbounded = false;
 	switch (op) {
 	case PictOpIn:
+		unbounded = true;
+		if (priv->clear && priv->clear_color == 0xff)
+			op = PictOpSrc;
+		break;
 	case PictOpAdd:
+		if (priv->clear && priv->clear_color == 0)
+			op = PictOpSrc;
+		break;
 	case PictOpSrc:
-		if (!fallback && is_gpu(dst->pDrawable))
-			return false;
 		break;
 	default:
 		DBG(("%s: fallback -- can not perform op [%d] in place\n",
 		     __FUNCTION__, op));
 		return false;
 	}
+	if (!fallback && is_gpu(dst->pDrawable))
+		return false;
 
 	DBG(("%s: format=%x, op=%d, color=%x\n",
 	     __FUNCTION__, dst->format, op, color));
@@ -3497,7 +3514,6 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 					     op == PictOpSrc ? MOVE_WRITE : MOVE_WRITE | MOVE_READ))
 		return true;
 
-	pixmap = get_drawable_pixmap(dst->pDrawable);
 	get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y);
 
 	inplace.ptr = pixmap->devPrivate.ptr;
@@ -3506,7 +3522,7 @@ trapezoid_span_inplace(CARD8 op, PicturePtr src, PicturePtr dst,
 	inplace.opacity = color >> 24;
 
 	tor_render(NULL, &tor, (void*)&inplace,
-		   dst->pCompositeClip, span, op == PictOpIn);
+		   dst->pCompositeClip, span, unbounded);
 
 	tor_fini(&tor);
 


More information about the xorg-commit mailing list