xf86-video-intel: 4 commits - src/sna/sna_accel.c

Thu Jan 19 07:18:33 PST 2012

src/sna/sna_accel.c |  170 +++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 161 insertions(+), 9 deletions(-)

New commits:
commit dbc75532d5f6a10e9fc7cfe3634d5a0db7304a99
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 19 14:36:32 2012 +0000

    sna: Tweak move-to-cpu to ignore inplace hint if its already on the CPU
    
    If we test the area to be drawn against the existing CPU damage and find
    it is already on the CPU, we may as well continue to utilize that
    damaged region.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 964ff65..9690cb0 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1170,6 +1170,9 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 
 	if (flags & MOVE_INPLACE_HINT &&
 	    priv->stride && priv->gpu_bo &&
+	    !(priv->cpu_damage &&
+	      sna_damage_contains_box__no_reduce(priv->cpu_damage,
+						 &region->extents)) &&
 	    sna_pixmap_move_area_to_gpu(pixmap, &region->extents)) {
 		assert(flags & MOVE_WRITE);
 		kgem_bo_submit(&sna->kgem, priv->gpu_bo);
commit 7ad4a0c9423ee7f4041173c428b07ac4af312fe1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 19 11:15:38 2012 +0000

    sna: Only use the blitter to emit wide spans if we cannot stream the updates
    
    If either the region is busy on the gpu or if we need to read the
    destination then we would incur penalties for trying to perform the
    operation through the GTT. However, if we are simply streaming pixels to
    an unbusy bo then we can do so inplace faster than computing the
    corresponding GPU commands and uploading them.
    
    Note: currently it is universally slower to use the GPU here (the
    computation of the spans is too slow). However that is only according to
    micro-benchmarks, avoiding the readback is likely to be more efficient
    in practice.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 4645c68..964ff65 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -58,8 +58,8 @@
 #define FORCE_FALLBACK 0
 #define FORCE_FLUSH 0
 
-#define USE_SPANS 0
 #define USE_INPLACE 1
+#define USE_WIDE_SPANS 0 /* -1 force CPU, 1 force GPU */
 #define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
 #define USE_BO_FOR_SCRATCH_PIXMAP 1
 
@@ -5334,6 +5334,70 @@ use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
 	return ret;
 }
 
+/* Only use our spans code if the destination is busy and we can't perform
+ * the operation in place.
+ *
+ * Currently it looks to be faster to use the CPU for wide spans on all
+ * platforms, slow MI code. But that does not take into account the true
+ * cost of readback?
+ */
+inline static bool
+_use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+{
+	PixmapPtr pixmap;
+	struct sna_pixmap *priv;
+	BoxRec area;
+	int16_t dx, dy;
+
+	if (USE_WIDE_SPANS)
+		return USE_WIDE_SPANS > 0;
+
+	if ((drawable_gc_flags(drawable, gc, false) & MOVE_INPLACE_HINT) == 0)
+		return TRUE;
+
+	/* XXX check for GPU stalls on the gc (stipple, tile, etc) */
+
+	pixmap = get_drawable_pixmap(drawable);
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL)
+		return FALSE;
+
+	if (DAMAGE_IS_ALL(priv->cpu_damage))
+		return FALSE;
+
+	if (priv->stride == 0 || priv->gpu_bo == NULL)
+		return FALSE;
+
+	if (!kgem_bo_is_busy(priv->gpu_bo))
+		return FALSE;
+
+	if (DAMAGE_IS_ALL(priv->gpu_damage))
+		return TRUE;
+
+	if (priv->gpu_damage == NULL)
+		return FALSE;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	area = *extents;
+	area.x1 += dx;
+	area.x2 += dx;
+	area.y1 += dy;
+	area.y2 += dy;
+	DBG(("%s extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+	     area.x1, area.y1, area.x2, area.y2));
+
+	return sna_damage_contains_box(priv->gpu_damage,
+				       &area) != PIXMAN_REGION_OUT;
+}
+
+static bool
+use_wide_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+{
+	bool ret = _use_wide_spans(drawable, gc, extents);
+	DBG(("%s? %d\n", __FUNCTION__, ret));
+	return ret;
+}
+
 static void
 sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	      int mode, int n, DDXPointPtr pt)
@@ -5505,7 +5569,7 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	}
 
 spans_fallback:
-	if (USE_SPANS &&
+	if (use_wide_spans(drawable, gc, &region.extents) &&
 	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		DBG(("%s: converting line into spans\n", __FUNCTION__));
 		switch (gc->lineStyle) {
@@ -6377,7 +6441,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 
 	/* XXX Do we really want to base this decision on the amalgam ? */
 spans_fallback:
-	if (USE_SPANS &&
+	if (use_wide_spans(drawable, gc, &region.extents) &&
 	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		void (*line)(DrawablePtr, GCPtr, int, int, DDXPointPtr);
 		int i;
@@ -7076,7 +7140,8 @@ sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc)
 		goto fallback;
 
 	/* For "simple" cases use the miPolyArc to spans path */
-	if (USE_SPANS && arc_to_spans(gc, n) &&
+	if (use_wide_spans(drawable, gc, &region.extents) &&
+	    arc_to_spans(gc, n) &&
 	    sna_drawable_use_gpu_bo(drawable, &region.extents, &damage)) {
 		DBG(("%s: converting arcs into spans\n", __FUNCTION__));
 		/* XXX still around 10x slower for x11perf -ellipse */
commit 9db6b9fad808175184849f3030862ea115ef0708
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 19 11:54:14 2012 +0000

    sna: Also check for the inplace hint when migrating the whole pixmap
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index dd13eba..4645c68 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -866,6 +866,28 @@ skip_inplace_map:
 		goto done;
 	}
 
+	if (flags & MOVE_INPLACE_HINT &&
+	    priv->stride && priv->gpu_bo &&
+	    sna_pixmap_move_to_gpu(pixmap, flags)) {
+		assert(flags & MOVE_WRITE);
+		kgem_bo_submit(&sna->kgem, priv->gpu_bo);
+
+		DBG(("%s: operate inplace\n", __FUNCTION__));
+
+		pixmap->devPrivate.ptr =
+			kgem_bo_map(&sna->kgem, priv->gpu_bo);
+		if (pixmap->devPrivate.ptr != NULL) {
+			priv->mapped = true;
+			pixmap->devKind = priv->gpu_bo->pitch;
+			sna_damage_all(&priv->gpu_damage,
+				       pixmap->drawable.width,
+				       pixmap->drawable.height);
+			return true;
+		}
+
+		priv->mapped = false;
+	}
+
 	if (priv->mapped) {
 		pixmap->devPrivate.ptr = NULL;
 		priv->mapped = false;
@@ -1149,6 +1171,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	if (flags & MOVE_INPLACE_HINT &&
 	    priv->stride && priv->gpu_bo &&
 	    sna_pixmap_move_area_to_gpu(pixmap, &region->extents)) {
+		assert(flags & MOVE_WRITE);
 		kgem_bo_submit(&sna->kgem, priv->gpu_bo);
 
 		DBG(("%s: operate inplace\n", __FUNCTION__));
commit d3f7d5d614748306cc376d1929da2bbc14494a6e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 19 11:15:38 2012 +0000

    sna: Only use the blitter to emit spans if we cannot stream the updates
    
    If either the region is busy on the gpu or if we need to read the
    destination then we would incur penalties for trying to perform the
    operation through the GTT. However, if we are simply streaming pixels to
    an unbusy bo then we can do so inplace faster than computing the
    corresponding GPU commands and uploading them.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 08889f8..dd13eba 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -60,7 +60,7 @@
 
 #define USE_SPANS 0
 #define USE_INPLACE 1
-#define USE_ZERO_SPANS 1
+#define USE_ZERO_SPANS 1 /* -1 force CPU, 1 force GPU */
 #define USE_BO_FOR_SCRATCH_PIXMAP 1
 
 #define MIGRATE_ALL 0
@@ -1148,8 +1148,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 
 	if (flags & MOVE_INPLACE_HINT &&
 	    priv->stride && priv->gpu_bo &&
-	    (DAMAGE_IS_ALL(priv->gpu_damage) ||
-	     region_inplace(sna, pixmap, region, priv)) &&
 	    sna_pixmap_move_area_to_gpu(pixmap, &region->extents)) {
 		kgem_bo_submit(&sna->kgem, priv->gpu_bo);
 
@@ -5250,6 +5248,69 @@ sna_poly_line_extents(DrawablePtr drawable, GCPtr gc,
 	return 1 | blt << 2 | clip << 1;
 }
 
+/* Only use our spans code if the destination is busy and we can't perform
+ * the operation in place.
+ *
+ * Currently it looks to be faster to use the GPU for zero spans on all
+ * platforms.
+ */
+inline static bool
+_use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+{
+	PixmapPtr pixmap;
+	struct sna_pixmap *priv;
+	BoxRec area;
+	int16_t dx, dy;
+
+	if (USE_ZERO_SPANS)
+		return USE_ZERO_SPANS > 0;
+
+	if ((drawable_gc_flags(drawable, gc, false) & MOVE_INPLACE_HINT) == 0)
+		return TRUE;
+
+	/* XXX check for GPU stalls on the gc (stipple, tile, etc) */
+
+	pixmap = get_drawable_pixmap(drawable);
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL)
+		return FALSE;
+
+	if (DAMAGE_IS_ALL(priv->cpu_damage))
+		return FALSE;
+
+	if (priv->stride == 0 || priv->gpu_bo == NULL)
+		return FALSE;
+
+	if (!kgem_bo_is_busy(priv->gpu_bo))
+		return FALSE;
+
+	if (DAMAGE_IS_ALL(priv->gpu_damage))
+		return TRUE;
+
+	if (priv->gpu_damage == NULL)
+		return FALSE;
+
+	get_drawable_deltas(drawable, pixmap, &dx, &dy);
+	area = *extents;
+	area.x1 += dx;
+	area.x2 += dx;
+	area.y1 += dy;
+	area.y2 += dy;
+	DBG(("%s extents (%d, %d), (%d, %d)\n", __FUNCTION__,
+	     area.x1, area.y1, area.x2, area.y2));
+
+	return sna_damage_contains_box(priv->gpu_damage,
+				       &area) != PIXMAN_REGION_OUT;
+}
+
+static bool
+use_zero_spans(DrawablePtr drawable, GCPtr gc, const BoxRec *extents)
+{
+	bool ret = _use_zero_spans(drawable, gc, extents);
+	DBG(("%s? %d\n", __FUNCTION__, ret));
+	return ret;
+}
+
 static void
 sna_poly_line(DrawablePtr drawable, GCPtr gc,
 	      int mode, int n, DDXPointPtr pt)
@@ -5339,7 +5400,7 @@ sna_poly_line(DrawablePtr drawable, GCPtr gc,
 					      &region.extents, flags & 2))
 				return;
 		} else { /* !rectilinear */
-			if (USE_ZERO_SPANS &&
+			if (use_zero_spans(drawable, gc, &region.extents) &&
 			    sna_drawable_use_gpu_bo(drawable,
 						    &region.extents,
 						    &damage) &&
@@ -6219,7 +6280,7 @@ sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg)
 						 &region.extents, flags & 2))
 				return;
 		} else {
-			if (USE_ZERO_SPANS &&
+			if (use_zero_spans(drawable, gc, &region.extents) &&
 			    sna_drawable_use_gpu_bo(drawable,
 						    &region.extents,
 						    &damage) &&