xf86-video-intel: 4 commits - src/sna/gen3_render.c src/sna/sna_dri.c src/sna/sna_trapezoids.c

Chris Wilson ickle at kemper.freedesktop.org
Fri Mar 16 08:33:24 PDT 2012


 src/sna/gen3_render.c    |   42 ++++++++-
 src/sna/sna_dri.c        |  200 ++++++++++++++++++++++++++++++++++++++---------
 src/sna/sna_trapezoids.c |   63 ++++++++++----
 3 files changed, 243 insertions(+), 62 deletions(-)

New commits:
commit 63c0d10faee3c7cca050505c2e81c416119e57e9
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 16 14:51:53 2012 +0000

    sna/dri: Improve handling of non-front attachments for CopyRegion
    
    Confusion reigns between using the backing pixmap for the drawable for
    the front buffer, and a fake pixmap for the auxiliary buffers.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c
index ccaf40f..283b8db 100644
--- a/src/sna/sna_dri.c
+++ b/src/sna/sna_dri.c
@@ -394,9 +394,9 @@ static void set_bo(PixmapPtr pixmap, struct kgem_bo *bo)
 }
 
 static void
-sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
-	     struct kgem_bo *dst_bo,struct kgem_bo *src_bo,
-	     bool sync)
+sna_dri_copy_to_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
+		      struct kgem_bo *dst_bo, struct kgem_bo *src_bo,
+		      bool sync)
 {
 	PixmapPtr pixmap = get_drawable_pixmap(draw);
 	pixman_region16_t clip;
@@ -477,18 +477,8 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
 		boxes = &box;
 		n = 1;
 	}
-	/* It's important that this copy gets submitted before the
-	 * direct rendering client submits rendering for the next
-	 * frame, but we don't actually need to submit right now.  The
-	 * client will wait for the DRI2CopyRegion reply or the swap
-	 * buffer event before rendering, and we'll hit the flush
-	 * callback chain before those messages are sent.  We submit
-	 * our batch buffers from the flush callback chain so we know
-	 * that will happen before the client tries to render
-	 * again.
-	 */
 	sna->render.copy_boxes(sna, GXcopy,
-			       pixmap, src_bo, -draw->x, -draw->y,
+			       (PixmapPtr)draw, src_bo, -draw->x, -draw->y,
 			       pixmap, dst_bo, dx, dy,
 			       boxes, n);
 
@@ -505,6 +495,137 @@ sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
 }
 
 static void
+sna_dri_copy_from_front(struct sna *sna, DrawablePtr draw, RegionPtr region,
+			struct kgem_bo *dst_bo, struct kgem_bo *src_bo,
+			bool sync)
+{
+	PixmapPtr pixmap = get_drawable_pixmap(draw);
+	pixman_region16_t clip;
+	BoxRec box, *boxes;
+	int16_t dx, dy;
+	int n;
+
+	box.x1 = draw->x;
+	box.y1 = draw->y;
+	box.x2 = draw->x + draw->width;
+	box.y2 = draw->y + draw->height;
+
+	if (region) {
+		pixman_region_translate(region, draw->x, draw->y);
+		pixman_region_init_rects(&clip, &box, 1);
+		pixman_region_intersect(&clip, &clip, region);
+		region = &clip;
+
+		if (!pixman_region_not_empty(region)) {
+			DBG(("%s: all clipped\n", __FUNCTION__));
+			return;
+		}
+	}
+
+	dx = dy = 0;
+	if (draw->type != DRAWABLE_PIXMAP) {
+		WindowPtr win = (WindowPtr)draw;
+
+		DBG(("%s: draw=(%d, %d), delta=(%d, %d), clip.extents=(%d, %d), (%d, %d)\n",
+		     __FUNCTION__, draw->x, draw->y,
+		     get_drawable_dx(draw), get_drawable_dy(draw),
+		     win->clipList.extents.x1, win->clipList.extents.y1,
+		     win->clipList.extents.x2, win->clipList.extents.y2));
+
+		if (region == NULL) {
+			pixman_region_init_rects(&clip, &box, 1);
+			region = &clip;
+		}
+
+		pixman_region_intersect(region, &win->clipList, region);
+		if (!pixman_region_not_empty(region)) {
+			DBG(("%s: all clipped\n", __FUNCTION__));
+			return;
+		}
+
+		get_drawable_deltas(draw, pixmap, &dx, &dy);
+	}
+
+	if (sna->kgem.gen >= 60)
+		kgem_set_mode(&sna->kgem, KGEM_RENDER);
+
+	if (region) {
+		boxes = REGION_RECTS(region);
+		n = REGION_NUM_RECTS(region);
+		assert(n);
+	} else {
+		pixman_region_init_rects(&clip, &box, 1);
+		region = &clip;
+		boxes = &box;
+		n = 1;
+	}
+	sna->render.copy_boxes(sna, GXcopy,
+			       pixmap, src_bo, dx, dy,
+			       (PixmapPtr)draw, dst_bo, -draw->x, -draw->y,
+			       boxes, n);
+
+	if (region == &clip)
+		pixman_region_fini(&clip);
+}
+
+static void
+sna_dri_copy(struct sna *sna, DrawablePtr draw, RegionPtr region,
+	     struct kgem_bo *dst_bo, struct kgem_bo *src_bo,
+	     bool sync)
+{
+	pixman_region16_t clip;
+	BoxRec box, *boxes;
+	int n;
+
+	box.x1 = 0;
+	box.y1 = 0;
+	box.x2 = draw->width;
+	box.y2 = draw->height;
+
+	if (region) {
+		pixman_region_init_rects(&clip, &box, 1);
+		pixman_region_intersect(&clip, &clip, region);
+		region = &clip;
+
+		if (!pixman_region_not_empty(region)) {
+			DBG(("%s: all clipped\n", __FUNCTION__));
+			return;
+		}
+
+		boxes = REGION_RECTS(region);
+		n = REGION_NUM_RECTS(region);
+		assert(n);
+	} else {
+		boxes = &box;
+		n = 1;
+	}
+
+	if (sna->kgem.gen >= 60) {
+		/* Sandybridge introduced a separate ring which it uses to
+		 * perform blits. Switching rendering between rings incurs
+		 * a stall as we wait upon the old ring to finish and
+		 * flush its render cache before we can proceed on with
+		 * the operation on the new ring.
+		 *
+		 * As this buffer, we presume, has just been written to by
+		 * the DRI client using the RENDER ring, we want to perform
+		 * our operation on the same ring, and ideally on the same
+		 * ring as we will flip from (which should be the RENDER ring
+		 * as well).
+		 */
+		kgem_set_mode(&sna->kgem, KGEM_RENDER);
+	}
+
+	sna->render.copy_boxes(sna, GXcopy,
+			       (PixmapPtr)draw, src_bo, 0, 0,
+			       (PixmapPtr)draw, dst_bo, 0, 0,
+			       boxes, n);
+
+	if (region == &clip)
+		pixman_region_fini(&clip);
+}
+
+static void
 sna_dri_copy_region(DrawablePtr draw,
 		    RegionPtr region,
 		    DRI2BufferPtr dst_buffer,
@@ -513,15 +634,20 @@ sna_dri_copy_region(DrawablePtr draw,
 	PixmapPtr pixmap = get_drawable_pixmap(draw);
 	struct sna *sna = to_sna_from_pixmap(pixmap);
 	struct kgem_bo *src, *dst;
+	void (*copy)(struct sna *, DrawablePtr, RegionPtr,
+		     struct kgem_bo *, struct kgem_bo *, bool) = sna_dri_copy;
 
-	if (dst_buffer->attachment == DRI2BufferFrontLeft)
+	if (dst_buffer->attachment == DRI2BufferFrontLeft) {
 		dst = sna_pixmap_set_dri(sna, pixmap);
-	else
+		copy = sna_dri_copy_to_front;
+	} else
 		dst = get_private(dst_buffer)->bo;
 
-	if (src_buffer->attachment == DRI2BufferFrontLeft)
+	if (src_buffer->attachment == DRI2BufferFrontLeft) {
 		src = sna_pixmap_set_dri(sna, pixmap);
-	else
+		assert(copy == sna_dri_copy);
+		copy = sna_dri_copy_from_front;
+	} else
 		src = get_private(src_buffer)->bo;
 
 	assert(dst != NULL);
@@ -534,13 +660,13 @@ sna_dri_copy_region(DrawablePtr draw,
 	DBG(("%s: src -- attachment=%d, name=%d, handle=%d\n",
 	     __FUNCTION__,
 	     src_buffer->attachment, src_buffer->name, src->handle));
-	DBG(("%s: clip (%d, %d), (%d, %d) x %d\n",
+	DBG(("%s: region (%d, %d), (%d, %d) x %d\n",
 	     __FUNCTION__,
 	     region->extents.x1, region->extents.y1,
 	     region->extents.x2, region->extents.y2,
 	     REGION_NUM_RECTS(region)));
 
-	sna_dri_copy(sna, draw, region, dst, src, false);
+	copy(sna, draw, region, dst, src, false);
 }
 
 #if DRI2INFOREC_VERSION >= 4
@@ -916,12 +1042,12 @@ static void sna_dri_vblank_handle(int fd,
 			info->old_front.bo = NULL;
 			return;
 		}
-		/* else fall through to exchange/blit */
+		/* else fall through to blit */
 	case DRI2_SWAP:
-		sna_dri_copy(sna, draw, NULL,
-			     get_private(info->front)->bo,
-			     get_private(info->back)->bo,
-			     true);
+		sna_dri_copy_to_front(sna, draw, NULL,
+				      get_private(info->front)->bo,
+				      get_private(info->back)->bo,
+				      true);
 	case DRI2_SWAP_THROTTLE:
 		DBG(("%s: %d complete, frame=%d tv=%d.%06d\n",
 		     __FUNCTION__, info->type, frame, tv_sec, tv_usec));
@@ -1451,10 +1577,10 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 			 DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data);
 		 }
 
-		 sna_dri_copy(sna, draw, NULL,
-			      get_private(front)->bo,
-			      get_private(back)->bo,
-			      true);
+		 sna_dri_copy_to_front(sna, draw, NULL,
+				       get_private(front)->bo,
+				       get_private(back)->bo,
+				       true);
 		 return TRUE;
 	}
 
@@ -1540,10 +1666,10 @@ sna_dri_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front,
 
 blit_fallback:
 	DBG(("%s -- blit\n", __FUNCTION__));
-	sna_dri_copy(sna, draw, NULL,
-		     get_private(front)->bo,
-		     get_private(back)->bo,
-		     true);
+	sna_dri_copy_to_front(sna, draw, NULL,
+			      get_private(front)->bo,
+			      get_private(back)->bo,
+			      false);
 	if (info)
 		sna_dri_frame_event_info_free(info);
 	DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data);
@@ -1584,10 +1710,10 @@ sna_dri_async_swap(ClientPtr client, DrawablePtr draw,
 blit:
 		DBG(("%s: unable to flip, so blit\n", __FUNCTION__));
 
-		sna_dri_copy(sna, draw, NULL,
-			     get_private(front)->bo,
-			     get_private(back)->bo,
-			     false);
+		sna_dri_copy_to_front(sna, draw, NULL,
+				      get_private(front)->bo,
+				      get_private(back)->bo,
+				      false);
 		DRI2SwapComplete(client, draw, 0, 0, 0,
 				 DRI2_BLIT_COMPLETE, func, data);
 		return FALSE;
commit 324a1dffdc7dd896224fab265bd6a9cf99d7587f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 16 13:07:00 2012 +0000

    sna/gen3: Micro-optimise solid span emission
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index c567d6b..0991a98 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -1803,7 +1803,7 @@ inline static int gen3_get_rectangles(struct sna *sna,
 
 start:
 	rem = vertex_space(sna);
-	if (op->floats_per_rect > rem) {
+	if (unlikely(op->floats_per_rect > rem)) {
 		DBG(("flushing vbo for %s: %d < %d\n",
 		     __FUNCTION__, rem, op->floats_per_rect));
 		rem = gen3_get_rectangles__flush(sna, op);
@@ -3194,6 +3194,33 @@ gen3_emit_composite_spans_primitive(struct sna *sna,
 }
 
 fastcall static void
+gen3_render_composite_spans_constant_box(struct sna *sna,
+					 const struct sna_composite_spans_op *op,
+					 const BoxRec *box, float opacity)
+{
+	float *v;
+	DBG(("%s: src=+(%d, %d), opacity=%f, dst=+(%d, %d), box=(%d, %d) x (%d, %d)\n",
+	     __FUNCTION__,
+	     op->base.src.offset[0], op->base.src.offset[1],
+	     opacity,
+	     op->base.dst.x, op->base.dst.y,
+	     box->x1, box->y1,
+	     box->x2 - box->x1,
+	     box->y2 - box->y1));
+
+	gen3_get_rectangles(sna, &op->base, 1);
+
+	v = sna->render.vertices + sna->render.vertex_used;
+	sna->render.vertex_used += 9;
+
+	v[0] = box->x2;
+	v[6] = v[3] = box->x1;
+	v[4] = v[1] = box->y2;
+	v[7] = box->y1;
+	v[8] = v[5] = v[2] = opacity;
+}
+
+fastcall static void
 gen3_render_composite_spans_box(struct sna *sna,
 				const struct sna_composite_spans_op *op,
 				const BoxRec *box, float opacity)
@@ -3328,6 +3355,9 @@ gen3_render_composite_spans(struct sna *sna,
 		tmp->base.mask.u.gen3.type = SHADER_OPACITY;
 
 	no_offset = tmp->base.dst.x == 0 && tmp->base.dst.y == 0;
+	tmp->box   = gen3_render_composite_spans_box;
+	tmp->boxes = gen3_render_composite_spans_boxes;
+	tmp->done  = gen3_render_composite_spans_done;
 	tmp->prim_emit = gen3_emit_composite_spans_primitive;
 	switch (tmp->base.src.u.gen3.type) {
 	case SHADER_NONE:
@@ -3338,7 +3368,11 @@ gen3_render_composite_spans(struct sna *sna,
 	case SHADER_BLACK:
 	case SHADER_WHITE:
 	case SHADER_CONSTANT:
-		tmp->prim_emit = no_offset ? gen3_emit_composite_spans_primitive_constant_no_offset : gen3_emit_composite_spans_primitive_constant;
+		if (no_offset) {
+			tmp->box = gen3_render_composite_spans_constant_box;
+			tmp->prim_emit = gen3_emit_composite_spans_primitive_constant_no_offset;
+		} else
+			tmp->prim_emit = gen3_emit_composite_spans_primitive_constant;
 		break;
 	case SHADER_LINEAR:
 	case SHADER_RADIAL:
@@ -3364,10 +3398,6 @@ gen3_render_composite_spans(struct sna *sna,
 		tmp->base.mask.u.gen3.type == SHADER_OPACITY;
 	tmp->base.floats_per_rect = 3 * tmp->base.floats_per_vertex;
 
-	tmp->box   = gen3_render_composite_spans_box;
-	tmp->boxes = gen3_render_composite_spans_boxes;
-	tmp->done  = gen3_render_composite_spans_done;
-
 	if (!kgem_check_bo(&sna->kgem,
 			   tmp->base.dst.bo, tmp->base.src.bo,
 			   NULL)) {
commit 47c47b85f6b7ef5cbc2192da9ef0cee91f6744ca
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 16 12:59:13 2012 +0000

    sna/traps: Micro-optimise span emission
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 4dab0f9..52be00b 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -1141,6 +1141,18 @@ tor_blt_span(struct sna *sna,
 }
 
 static void
+tor_blt_span__no_damage(struct sna *sna,
+			struct sna_composite_spans_op *op,
+			pixman_region16_t *clip,
+			const BoxRec *box,
+			int coverage)
+{
+	__DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage));
+
+	op->box(sna, op, box, AREA_TO_ALPHA(coverage));
+}
+
+static void
 tor_blt_span_clipped(struct sna *sna,
 		     struct sna_composite_spans_op *op,
 		     pixman_region16_t *clip,
@@ -3101,7 +3113,8 @@ project_trapezoid_onto_grid(const xTrapezoid *in,
 }
 
 static span_func_t
-choose_span(PicturePtr dst,
+choose_span(struct sna_composite_spans_op *tmp,
+	    PicturePtr dst,
 	    PictFormatPtr maskFormat,
 	    uint8_t op,
 	    RegionPtr clip)
@@ -3120,9 +3133,12 @@ choose_span(PicturePtr dst,
 				span = tor_blt_span_mono_clipped;
 		}
 	} else {
-		span = tor_blt_span;
 		if (REGION_NUM_RECTS(clip) > 1)
 			span = tor_blt_span_clipped;
+		else if (tmp->base.damage == NULL)
+			span = tor_blt_span__no_damage;
+		else
+			span = tor_blt_span;
 	}
 
 	return span;
@@ -3379,7 +3395,7 @@ trapezoid_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	}
 
 	tor_render(sna, &tor, &tmp, &clip,
-		   choose_span(dst, maskFormat, op, &clip),
+		   choose_span(&tmp, dst, maskFormat, op, &clip),
 		   maskFormat && !operator_is_bounded(op));
 
 skip:
@@ -4669,7 +4685,7 @@ trap_span_converter(PicturePtr dst,
 	}
 
 	tor_render(sna, &tor, &tmp, clip,
-		   choose_span(dst, NULL, PictOpAdd, clip), false);
+		   choose_span(&tmp, dst, NULL, PictOpAdd, clip), false);
 
 skip:
 	tor_fini(&tor);
@@ -5196,7 +5212,7 @@ triangles_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	}
 
 	tor_render(sna, &tor, &tmp, &clip,
-		   choose_span(dst, maskFormat, op, &clip),
+		   choose_span(&tmp, dst, maskFormat, op, &clip),
 		   maskFormat && !operator_is_bounded(op));
 
 skip:
@@ -5559,7 +5575,7 @@ tristrip_span_converter(CARD8 op, PicturePtr src, PicturePtr dst,
 	assert(tor.polygon->num_edges <= 2*count);
 
 	tor_render(sna, &tor, &tmp, &clip,
-		   choose_span(dst, maskFormat, op, &clip),
+		   choose_span(&tmp, dst, maskFormat, op, &clip),
 		   maskFormat && !operator_is_bounded(op));
 
 skip:
commit 3a26437f195bf3e5d5913f5552a0b838cafb9b2c
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Fri Mar 16 12:37:25 2012 +0000

    sna/traps: Tune nonzero_row
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index c1d42a7..4dab0f9 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -591,20 +591,31 @@ cell_list_add_subspan(struct cell_list *cells,
 		cell->uncovered_area += 2*(fx1-fx2);
 }
 
-static void
-cell_list_render_edge(struct cell_list *cells, struct edge *edge, int sign)
+inline static void
+cell_list_add_span(struct cell_list *cells,
+		   grid_scaled_x_t x1,
+		   grid_scaled_x_t x2)
 {
 	struct cell *cell;
-	grid_scaled_x_t fx;
-	int ix;
+	int ix1, fx1;
+	int ix2, fx2;
+
+	FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1);
+	FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2);
+
+	__DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__,
+	       x1, ix1, fx1, x2, ix2, fx2));
 
-	FAST_SAMPLES_X_TO_INT_FRAC(edge->x.quo, ix, fx);
+	cell = cell_list_find(cells, ix1);
+	if (ix1 != ix2) {
+		cell->uncovered_area += 2*fx1;
+		cell->covered_height += FAST_SAMPLES_Y;
 
-	/* We always know that ix1 is >= the cell list cursor in this
-	 * case due to the no-intersections precondition.  */
-	cell = cell_list_find(cells, ix);
-	cell->covered_height += sign*FAST_SAMPLES_Y;
-	cell->uncovered_area += sign*2*fx*FAST_SAMPLES_Y;
+		cell = cell_list_find(cells, ix2);
+		cell->uncovered_area -= 2*fx2;
+		cell->covered_height -= FAST_SAMPLES_Y;
+	} else
+		cell->uncovered_area += 2*(fx1-fx2);
 }
 
 static void
@@ -1054,9 +1065,7 @@ nonzero_row(struct active_list *active, struct cell_list *coverages)
 			right = right->next;
 		} while (1);
 
-		cell_list_render_edge(coverages, left, +1);
-		cell_list_render_edge(coverages, right, -1);
-
+		cell_list_add_span(coverages, left->x.quo, right->x.quo);
 		left = right->next;
 	}
 }


More information about the xorg-commit mailing list