xf86-video-intel: 4 commits - src/sna/gen2_render.c src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_reg.h src/sna/sna_render.h

Chris Wilson ickle at kemper.freedesktop.org
Mon Mar 10 07:55:11 PDT 2014


 src/sna/gen2_render.c |   65 +++++++++++++++------------
 src/sna/sna_accel.c   |   75 ++++++++++++++++++++++---------
 src/sna/sna_blt.c     |  118 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/sna/sna_reg.h     |    1 
 src/sna/sna_render.h  |    5 ++
 5 files changed, 213 insertions(+), 51 deletions(-)

New commits:
commit 142f8461944b294dbc3fb0a7bf607bccf0bccc1f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Mar 10 15:20:16 2014 +0000

    sna/gen2: Tidy blend factor selection for the source
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 3de0a8f..18079a8 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -352,6 +352,18 @@ gen2_get_blend_factors(const struct sna_composite_op *op,
 
 
 	/* Get the source picture's channels into TBx_ARG1 */
+	if (op->src.is_solid)
+		cblend |= TB0C_ARG1_SEL_DIFFUSE;
+	else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
+		cblend |= TB0C_ARG1_SEL_TEXEL0;
+	else
+		cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
+	if (op->src.is_solid)
+		ablend |= TB0A_ARG1_SEL_DIFFUSE;
+	else if (op->src.is_opaque)
+		ablend |= TB0A_ARG1_SEL_ONE;
+	else
+		ablend |= TB0A_ARG1_SEL_TEXEL0;
 	if ((op->has_component_alpha && gen2_blend_op[blend].src_alpha) ||
 	    op->dst.format == PICT_a8) {
 		/* Producing source alpha value, so the first set of channels
@@ -359,27 +371,7 @@ gen2_get_blend_factors(const struct sna_composite_op *op,
 		 * is a8, in which case src.G is what's written, and the other
 		 * channels are ignored.
 		 */
-		if (op->src.is_solid) {
-			ablend |= TB0A_ARG1_SEL_DIFFUSE;
-			cblend |= TB0C_ARG1_SEL_DIFFUSE | TB0C_ARG1_REPLICATE_ALPHA;
-		} else {
-			ablend |= TB0A_ARG1_SEL_TEXEL0;
-			cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA;
-		}
-	} else {
-		if (op->src.is_solid)
-			cblend |= TB0C_ARG1_SEL_DIFFUSE;
-		else if (PICT_FORMAT_RGB(op->src.pict_format) != 0)
-			cblend |= TB0C_ARG1_SEL_TEXEL0;
-		else
-			cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT;	/* 0.0 */
-		if (op->src.is_solid)
-			ablend |= TB0A_ARG1_SEL_DIFFUSE;
-		else if (op->src.is_opaque)
-			ablend |= TB0A_ARG1_SEL_ONE;
-		else
-			ablend |= TB0A_ARG1_SEL_TEXEL0;
-	}
+		cblend |= TB0C_ARG1_REPLICATE_ALPHA;
 
 	if (op->mask.bo) {
 		if (op->src.is_solid) {
@@ -599,6 +591,7 @@ static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
 	assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= MAX_3D_PITCH);
 	assert(sna->render.vertex_offset == 0);
 
+	assert(op->dst.bo->unique_id);
 	if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
 		kgem_bo_mark_dirty(op->dst.bo);
 		return;
commit 0075c90d3192ba90ff1ae8a7b04bfc3ff4fccda7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Mar 10 15:17:51 2014 +0000

    sna/gen2: Fix alpha blending with 8bit destination surfaces
    
    On gen2 (like gen3), 8-bit destination surfaces are read into the Green
    channel (and written to from the Green channel). Therefore the expected
    alpha blending must instead be converted to colour blending.
    
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=75818
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index da7049c..3de0a8f 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -425,14 +425,28 @@ static uint32_t gen2_get_blend_cntl(int op,
 	sblend = gen2_blend_op[op].src_blend;
 	dblend = gen2_blend_op[op].dst_blend;
 
-	/* If there's no dst alpha channel, adjust the blend op so that
-	 * we'll treat it as always 1.
-	 */
-	if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) {
-		if (sblend == BLENDFACTOR_DST_ALPHA)
-			sblend = BLENDFACTOR_ONE;
-		else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
-			sblend = BLENDFACTOR_ZERO;
+	if (gen2_blend_op[op].dst_alpha) {
+		/* If there's no dst alpha channel, adjust the blend op so that
+		 * we'll treat it as always 1.
+		 */
+		if (PICT_FORMAT_A(dst_format) == 0) {
+			if (sblend == BLENDFACTOR_DST_ALPHA)
+				sblend = BLENDFACTOR_ONE;
+			else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
+				sblend = BLENDFACTOR_ZERO;
+		}
+
+		/* gen2 engine reads 8bit color buffer into green channel
+		 * in cases like color buffer blending etc., and also writes
+		 * back green channel.  So with dst_alpha blend we should use
+		 * color factor.
+		 */
+		if (dst_format == PICT_a8) {
+			if (sblend == BLENDFACTOR_DST_ALPHA)
+				sblend = BLENDFACTOR_DST_COLR;
+			else if (sblend == BLENDFACTOR_INV_DST_ALPHA)
+				sblend = BLENDFACTOR_INV_DST_COLR;
+		}
 	}
 
 	/* If the source alpha is being used, then we should only be in a case
commit f0b70ca660416af42be8b3f76f9e38a81dcac464
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 7 08:16:50 2014 +0000

    sna: Unroll finding bbox of points
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index aff7b34..7be43c7 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -8271,10 +8271,33 @@ sna_poly_point_extents(DrawablePtr drawable, GCPtr gc,
 			box_add_pt(&box, last.x, last.y);
 		}
 	} else {
-		while (--n) {
-			++pt;
-			box_add_pt(&box, pt->x, pt->y);
+		--n; ++pt;
+		while (n >= 8) {
+			box_add_pt(&box, pt[0].x, pt[0].y);
+			box_add_pt(&box, pt[1].x, pt[1].y);
+			box_add_pt(&box, pt[2].x, pt[2].y);
+			box_add_pt(&box, pt[3].x, pt[3].y);
+			box_add_pt(&box, pt[4].x, pt[4].y);
+			box_add_pt(&box, pt[5].x, pt[5].y);
+			box_add_pt(&box, pt[6].x, pt[6].y);
+			box_add_pt(&box, pt[7].x, pt[7].y);
+			pt += 8;
+			n -= 8;
+		}
+		if (n & 4) {
+			box_add_pt(&box, pt[0].x, pt[0].y);
+			box_add_pt(&box, pt[1].x, pt[1].y);
+			box_add_pt(&box, pt[2].x, pt[2].y);
+			box_add_pt(&box, pt[3].x, pt[3].y);
+			pt += 4;
+		}
+		if (n & 2) {
+			box_add_pt(&box, pt[0].x, pt[0].y);
+			box_add_pt(&box, pt[1].x, pt[1].y);
+			pt += 2;
 		}
+		if (n & 1)
+			box_add_pt(&box, pt[0].x, pt[0].y);
 	}
 	box.x2++;
 	box.y2++;
commit 928453b1bc5a059a87fbcdba9156d318a0310073
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 7 08:15:21 2014 +0000

    sna: Emit points using the BLT primitive when appropriate
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6359de1..aff7b34 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -6586,10 +6586,14 @@ sna_poly_point__fill(DrawablePtr drawable, GCPtr gc,
 	DDXPointRec last;
 
 	DBG(("%s: count=%d\n", __FUNCTION__, n));
+	if (n == 0)
+		return;
 
 	last.x = drawable->x + data->dx;
 	last.y = drawable->y + data->dy;
-	while (n) {
+	if (op->points && mode != CoordModePrevious) {
+		op->points(data->sna, op, last.x, last.y, pt, n);
+	} else do {
 		BoxRec *b = box;
 		unsigned nbox = n;
 		if (nbox > ARRAY_SIZE(box))
@@ -6608,7 +6612,7 @@ sna_poly_point__fill(DrawablePtr drawable, GCPtr gc,
 			b++;
 		} while (--nbox);
 		op->boxes(data->sna, op, box, b - box);
-	}
+	} while (n);
 }
 
 static void
@@ -8177,26 +8181,30 @@ sna_poly_point_blt(DrawablePtr drawable,
 
 		assert_pixmap_contains_points(pixmap, pt, n, last.x, last.y);
 		sna_damage_add_points(damage, pt, n, last.x, last.y);
-		do {
-			unsigned nbox = n;
-			if (nbox > ARRAY_SIZE(box))
-				nbox = ARRAY_SIZE(box);
-			n -= nbox;
+		if (fill.points && mode != CoordModePrevious) {
+			fill.points(sna, &fill, last.x, last.y, pt, n);
+		} else {
 			do {
-				*(DDXPointRec *)b = *pt++;
+				unsigned nbox = n;
+				if (nbox > ARRAY_SIZE(box))
+					nbox = ARRAY_SIZE(box);
+				n -= nbox;
+				do {
+					*(DDXPointRec *)b = *pt++;
 
-				b->x1 += last.x;
-				b->y1 += last.y;
-				if (mode == CoordModePrevious)
-					last = *(DDXPointRec *)b;
+					b->x1 += last.x;
+					b->y1 += last.y;
+					if (mode == CoordModePrevious)
+						last = *(DDXPointRec *)b;
 
-				b->x2 = b->x1 + 1;
-				b->y2 = b->y1 + 1;
-				b++;
-			} while (--nbox);
-			fill.boxes(sna, &fill, box, b - box);
-			b = box;
-		} while (n);
+					b->x2 = b->x1 + 1;
+					b->y2 = b->y1 + 1;
+					b++;
+				} while (--nbox);
+				fill.boxes(sna, &fill, box, b - box);
+				b = box;
+			} while (n);
+		}
 	} else {
 		RegionPtr clip = gc->pCompositeClip;
 
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 4bbcdbc..26a0944 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -2862,6 +2862,123 @@ fastcall static void sna_blt_fill_op_boxes(struct sna *sna,
 	_sna_blt_fill_boxes(sna, &op->base.u.blt, box, nbox);
 }
 
+static inline uint64_t pt_add(const DDXPointRec *pt, int16_t dx, int16_t dy)
+{
+	union {
+		DDXPointRec pt;
+		uint32_t i;
+	} u;
+
+	u.pt.x = pt->x + dx;
+	u.pt.y = pt->y + dy;
+
+	return XY_PIXEL_BLT | (uint64_t)u.i<<32;
+}
+
+fastcall static void sna_blt_fill_op_points(struct sna *sna,
+					    const struct sna_fill_op *op,
+					    int16_t dx, int16_t dy,
+					    const DDXPointRec *p, int n)
+{
+	const struct sna_blt_state *blt = &op->base.u.blt;
+	struct kgem *kgem = &sna->kgem;
+
+	DBG(("%s: %08x x %d\n", __FUNCTION__, blt->pixel, npoints));
+
+	if (sna->blt_state.fill_bo != op->base.u.blt.bo[0]->unique_id) {
+		sna_blt_fill_begin(sna, blt);
+
+		sna->blt_state.fill_bo = blt->bo[0]->unique_id;
+		sna->blt_state.fill_pixel = blt->pixel;
+		sna->blt_state.fill_alu = blt->alu;
+	}
+
+	if (!kgem_check_batch(kgem, 2))
+		sna_blt_fill_begin(sna, blt);
+
+	do {
+		uint32_t *b = kgem->batch + kgem->nbatch;
+		int n_this_time;
+
+		assert(sna->kgem.mode == KGEM_BLT);
+		n_this_time = n;
+		if (2*n_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+			n_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 2;
+		assert(n_this_time);
+		n -= n_this_time;
+
+		kgem->nbatch += 2 * n_this_time;
+		assert(kgem->nbatch < kgem->surface);
+
+		if ((dx|dy) == 0) {
+			while (n_this_time >= 8) {
+				*((uint64_t *)b + 0) = pt_add(p+0, 0, 0);
+				*((uint64_t *)b + 1) = pt_add(p+1, 0, 0);
+				*((uint64_t *)b + 2) = pt_add(p+2, 0, 0);
+				*((uint64_t *)b + 3) = pt_add(p+3, 0, 0);
+				*((uint64_t *)b + 4) = pt_add(p+4, 0, 0);
+				*((uint64_t *)b + 5) = pt_add(p+5, 0, 0);
+				*((uint64_t *)b + 6) = pt_add(p+6, 0, 0);
+				*((uint64_t *)b + 7) = pt_add(p+7, 0, 0);
+				b += 16;
+				n_this_time -= 8;
+				p += 8;
+			}
+			if (n_this_time & 4) {
+				*((uint64_t *)b + 0) = pt_add(p+0, 0, 0);
+				*((uint64_t *)b + 1) = pt_add(p+1, 0, 0);
+				*((uint64_t *)b + 2) = pt_add(p+2, 0, 0);
+				*((uint64_t *)b + 3) = pt_add(p+3, 0, 0);
+				b += 8;
+				p += 4;
+			}
+			if (n_this_time & 2) {
+				*((uint64_t *)b + 0) = pt_add(p+0, 0, 0);
+				*((uint64_t *)b + 1) = pt_add(p+1, 0, 0);
+				b += 4;
+				p += 2;
+			}
+			if (n_this_time & 1)
+				*((uint64_t *)b + 0) = pt_add(p++, 0, 0);
+		} else {
+			while (n_this_time >= 8) {
+				*((uint64_t *)b + 0) = pt_add(p+0, dx, dy);
+				*((uint64_t *)b + 1) = pt_add(p+1, dx, dy);
+				*((uint64_t *)b + 2) = pt_add(p+2, dx, dy);
+				*((uint64_t *)b + 3) = pt_add(p+3, dx, dy);
+				*((uint64_t *)b + 4) = pt_add(p+4, dx, dy);
+				*((uint64_t *)b + 5) = pt_add(p+5, dx, dy);
+				*((uint64_t *)b + 6) = pt_add(p+6, dx, dy);
+				*((uint64_t *)b + 7) = pt_add(p+7, dx, dy);
+				b += 16;
+				n_this_time -= 8;
+				p += 8;
+			}
+			if (n_this_time & 4) {
+				*((uint64_t *)b + 0) = pt_add(p+0, dx, dy);
+				*((uint64_t *)b + 1) = pt_add(p+1, dx, dy);
+				*((uint64_t *)b + 2) = pt_add(p+2, dx, dy);
+				*((uint64_t *)b + 3) = pt_add(p+3, dx, dy);
+				b += 8;
+				p += 8;
+			}
+			if (n_this_time & 2) {
+				*((uint64_t *)b + 0) = pt_add(p+0, dx, dy);
+				*((uint64_t *)b + 1) = pt_add(p+1, dx, dy);
+				b += 4;
+				p += 2;
+			}
+			if (n_this_time & 1)
+				*((uint64_t *)b + 0) = pt_add(p++, dx, dy);
+		}
+
+		if (!n)
+			return;
+
+		sna_blt_fill_begin(sna, blt);
+	} while (1);
+}
+
 bool sna_blt_fill(struct sna *sna, uint8_t alu,
 		  struct kgem_bo *bo, int bpp,
 		  uint32_t pixel,
@@ -2886,6 +3003,7 @@ bool sna_blt_fill(struct sna *sna, uint8_t alu,
 	fill->blt   = sna_blt_fill_op_blt;
 	fill->box   = sna_blt_fill_op_box;
 	fill->boxes = sna_blt_fill_op_boxes;
+	fill->points = sna_blt_fill_op_points;
 	fill->done  =
 		(void (*)(struct sna *, const struct sna_fill_op *))nop_done;
 	return true;
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index bda6ef6..92a1ae5 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -47,6 +47,7 @@
 #define XY_SETUP_BLT			(2<<29|0x01<<22)
 #define XY_SETUP_MONO_PATTERN_SL_BLT	(2<<29|0x11<<22)
 #define XY_SETUP_CLIP			(2<<29|0x03<<22|1)
+#define XY_PIXEL_BLT			(2<<29|0x24<<22)
 #define XY_SCANLINE_BLT			(2<<29|0x25<<22|1)
 #define XY_TEXT_IMMEDIATE_BLT		(2<<29|0x31<<22|(1<<16))
 #define XY_SRC_COPY_BLT_CMD		(2<<29|0x53<<22)
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 67848b9..325b7cc 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -199,6 +199,11 @@ struct sna_fill_op {
 			       const struct sna_fill_op *op,
 			       const BoxRec *box,
 			       int count);
+	fastcall void (*points)(struct sna *sna,
+			       const struct sna_fill_op *op,
+			       int16_t dx, int16_t dy,
+			       const DDXPointRec *points,
+			       int count);
 	void (*done)(struct sna *sna, const struct sna_fill_op *op);
 };
 


More information about the xorg-commit mailing list