xf86-video-intel: 2 commits - src/sna/gen4_render.c src/sna/gen4_vertex.c

Wed Sep 11 02:22:22 PDT 2013

src/sna/gen4_render.c |   28 ----------------------------
 src/sna/gen4_vertex.c |   49 ++++++++++++++++++++++++++++++++-----------------
 2 files changed, 32 insertions(+), 45 deletions(-)

New commits:
commit 815caa9fc660a4eec15992a52b0f8ff9c3c71c55
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Sep 11 10:09:52 2013 +0100

    sna/gen4+: Rebalance creation of vbo during batch flushing
    
    We need to be careful not to copy too much data during the vertex flush
    or else that becomes the rate-limiting step. The goal here is to do the
    early flush to warm up the GPU, then transition to larger batches.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_vertex.c b/src/sna/gen4_vertex.c
index adeed3c..3c4911a 100644
--- a/src/sna/gen4_vertex.c
+++ b/src/sna/gen4_vertex.c
@@ -182,9 +182,16 @@ void gen4_vertex_close(struct sna *sna)
 
 		}
 	} else {
-		if (sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) {
+		int size;
+
+		size  = sna->kgem.nbatch;
+		size += sna->kgem.batch_size - sna->kgem.surface;
+		size += sna->render.vertex_used;
+
+		if (size <= 1024) {
 			DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__,
 			     sna->render.vertex_used, sna->kgem.nbatch));
+			assert(sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface);
 			memcpy(sna->kgem.batch + sna->kgem.nbatch,
 			       sna->render.vertex_data,
 			       sna->render.vertex_used * 4);
@@ -192,31 +199,37 @@ void gen4_vertex_close(struct sna *sna)
 			bo = NULL;
 			sna->kgem.nbatch += sna->render.vertex_used;
 		} else {
-			sna->render.vbo = kgem_create_linear(&sna->kgem,
-							     256*1024, CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_CACHED);
-			if (sna->render.vbo)
-				sna->render.vertices = kgem_bo_map(&sna->kgem, sna->render.vbo);
+			size = 256 * 1024;
+			do {
+				bo = kgem_create_linear(&sna->kgem, size,
+							CREATE_GTT_MAP | CREATE_NO_RETIRE | CREATE_NO_THROTTLE | CREATE_CACHED);
+			} while (bo == NULL && (size>>=1) > sizeof(float)*sna->render.vertex_used);
+
+			sna->render.vertices = NULL;
+			if (bo)
+				sna->render.vertices = kgem_bo_map(&sna->kgem, bo);
 			if (sna->render.vertices != NULL) {
-				int size;
+				DBG(("%s: new vbo: %d / %d\n", __FUNCTION__,
+				     sna->render.vertex_used, __kgem_bo_size(bo)/4));
 
-				assert(sizeof(float)*sna->render.vertex_used <=
-				       __kgem_bo_size(sna->render.vbo));
+				assert(sizeof(float)*sna->render.vertex_used <= __kgem_bo_size(bo));
 				memcpy(sna->render.vertices,
 				       sna->render.vertex_data,
 				       sizeof(float)*sna->render.vertex_used);
 
-				size = __kgem_bo_size(sna->render.vbo)/4;
+				size = __kgem_bo_size(bo)/4;
 				if (size >= UINT16_MAX)
 					size = UINT16_MAX - 1;
 
+				sna->render.vbo = bo;
 				sna->render.vertex_size = size;
-
-				bo = sna->render.vbo;
 			} else {
-				if (sna->render.vbo) {
-					kgem_bo_destroy(&sna->kgem, sna->render.vbo);
-					sna->render.vbo = NULL;
-				}
+				DBG(("%s: tmp vbo: %d\n", __FUNCTION__,
+				     sna->render.vertex_used));
+
+				if (bo)
+					kgem_bo_destroy(&sna->kgem, bo);
+
 				bo = kgem_create_linear(&sna->kgem,
 							4*sna->render.vertex_used,
 							CREATE_NO_THROTTLE);
@@ -226,8 +239,10 @@ void gen4_vertex_close(struct sna *sna)
 					kgem_bo_destroy(&sna->kgem, bo);
 					bo = NULL;
 				}
-				DBG(("%s: new vbo: %d\n", __FUNCTION__,
-				     sna->render.vertex_used));
+
+				assert(sna->render.vbo == NULL);
+				sna->render.vertices = sna->render.vertex_data;
+				sna->render.vertex_size = ARRAY_SIZE(sna->render.vertex_data);
 				free_bo = bo;
 			}
 		}
commit 0d05a69bd74dc02fb1360c9393de905b8842e1fe
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Sep 11 10:18:51 2013 +0100

    sna/gen4: Always try the BLT composite routines first
    
    Given how fragile the render operations are, taking the hit from
    transitioning from the slow render operations to the comparatively fast
    BLT (when possible) is always worth it.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index fb56d85..3158b58 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -1631,33 +1631,6 @@ gen4_composite_set_target(struct sna *sna,
 }
 
 static bool
-try_blt(struct sna *sna,
-	PicturePtr dst, PicturePtr src,
-	int width, int height)
-{
-	if (sna->kgem.mode != KGEM_RENDER) {
-		DBG(("%s: already performing BLT\n", __FUNCTION__));
-		return true;
-	}
-
-	if (too_large(width, height)) {
-		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
-		     __FUNCTION__, width, height));
-		return true;
-	}
-
-	if (too_large(dst->pDrawable->width, dst->pDrawable->height))
-		return true;
-
-	/* The blitter is much faster for solids */
-	if (sna_picture_is_solid(src, NULL))
-		return true;
-
-	/* is the source picture only in cpu memory e.g. a shm pixmap? */
-	return picture_is_cpu(sna, src);
-}
-
-static bool
 check_gradient(PicturePtr picture, bool precise)
 {
 	switch (picture->pSourcePict->type) {
@@ -1885,7 +1858,6 @@ gen4_render_composite(struct sna *sna,
 		return false;
 
 	if (mask == NULL &&
-	    try_blt(sna, dst, src, width, height) &&
 	    sna_blt_composite(sna, op,
 			      src, dst,
 			      src_x, src_y,