xf86-video-intel: 11 commits - src/sna/gen2_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_damage.h src/sna/sna.h src/sna/sna_render.c

Chris Wilson ickle at kemper.freedesktop.org
Mon Dec 12 18:11:43 PST 2011


 src/sna/gen2_render.c |   53 ++++++-----
 src/sna/kgem.c        |  183 ++++++++++++++++++++++++++++-----------
 src/sna/kgem.h        |    2 
 src/sna/sna.h         |    3 
 src/sna/sna_accel.c   |  232 +++++++++++++++++++++++++++++++++++---------------
 src/sna/sna_damage.h  |    5 -
 src/sna/sna_render.c  |   60 ++++++++++++
 7 files changed, 391 insertions(+), 147 deletions(-)

New commits:
commit d02dc0fd84194b6acc607d17e4396dd1fdc681f8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Dec 13 01:18:59 2011 +0000

    sna: Set the refcnt on the replacement bo
    
    The paranoia wasn't in vain.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 3d2705c..b76a180 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2571,6 +2571,7 @@ kgem_replace_bo(struct kgem *kgem,
 	}
 	dst->pitch = pitch;
 	dst->unique_id = kgem_get_unique_id(kgem);
+	dst->refcnt = 1;
 
 	kgem_set_mode(kgem, KGEM_BLT);
 	if (!kgem_check_batch(kgem, 8) ||
commit 7472db8c8c9994798ea36de1ce2f51725d8b387a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 21:55:04 2011 +0000

    sna: Double-check that the submitted buffers were not purged
    
    More paranoia is good for the soul.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 1a09849..3d2705c 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -224,6 +224,8 @@ kgem_busy(struct kgem *kgem, int handle)
 Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 		   const void *data, int length)
 {
+	assert(bo->refcnt);
+	assert(!bo->purged);
 	assert(!kgem_busy(kgem, bo->handle));
 
 	if (gem_write(kgem->fd, bo->handle, 0, length, data))
@@ -817,6 +819,8 @@ static void kgem_commit(struct kgem *kgem)
 	struct kgem_bo *bo, *next;
 
 	list_for_each_entry_safe(bo, next, &rq->buffers, request) {
+		assert(!bo->purged);
+
 		bo->presumed_offset = bo->exec->offset;
 		bo->binding.offset = 0;
 		bo->exec = NULL;
@@ -1803,7 +1807,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 {
 	int index;
 
-	assert ((read_write_domain & 0x7fff) == 0 || bo != NULL);
+	assert(bo->refcnt);
+	assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
 
 	index = kgem->nreloc++;
 	assert(index < ARRAY_SIZE(kgem->reloc));
@@ -1883,6 +1888,9 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 {
 	void *ptr;
 
+	assert(bo->refcnt);
+	assert(!bo->purged);
+
 	if (IS_CPU_MAP(bo->map)) {
 		DBG(("%s: discarding CPU vma cache for %d\n",
 		       __FUNCTION__, bo->handle));
@@ -1937,6 +1945,8 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
 	struct drm_i915_gem_mmap mmap_arg;
 
 	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bo->size));
+	assert(bo->refcnt);
+	assert(!bo->purged);
 
 	if (IS_CPU_MAP(bo->map)) {
 		void *ptr = CPU_MAP(bo->map);
commit 0bbd6a08fe485c80bd5c9a1b7027618a03a26f84
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 21:52:56 2011 +0000

    sna/gen2: Tidy checking against too large pixmaps for the 3D pipeline
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index e354c56..fb6b75c 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -54,6 +54,8 @@
 #define PREFER_BLT_FILL 1
 #define PREFER_BLT_COPY 1
 
+#define MAX_3D_SIZE 2048
+
 #define BATCH(v) batch_emit(sna, v)
 #define BATCH_F(v) batch_emit_float(sna, v)
 #define VERTEX(v) batch_emit_float(sna, v)
@@ -120,6 +122,12 @@ static const struct formatinfo {
 	{PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888},
 };
 
+static inline bool
+too_large(int width, int height)
+{
+	return (width | height) > MAX_3D_SIZE;
+}
+
 static inline uint32_t
 gen2_buf_tiling(uint32_t tiling)
 {
@@ -1178,7 +1186,7 @@ gen2_composite_picture(struct sna *sna,
 						  x, y, w, h, dst_x, dst_y);
 
 	channel->pict_format = picture->format;
-	if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048)
+	if (too_large(pixmap->drawable.width, pixmap->drawable.height))
 		return sna_render_picture_extract(sna, picture, channel,
 						  x, y, w, h, dst_x, dst_y);
 
@@ -1240,7 +1248,7 @@ try_blt(struct sna *sna,
 		return TRUE;
 	}
 
-	if (width > 2048 || height > 2048) {
+	if (too_large(width, height)) {
 		DBG(("%s: operation too large for 3D pipe (%d, %d)\n",
 		     __FUNCTION__, width, height));
 		return TRUE;
@@ -1422,8 +1430,7 @@ gen2_render_composite(struct sna *sna,
 	sna_render_reduce_damage(tmp, dst_x, dst_y, width, height);
 
 	tmp->op = op;
-	if (tmp->dst.width > 2048 ||
-	    tmp->dst.height > 2048 ||
+	if (too_large(tmp->dst.width, tmp->dst.height) ||
 	    tmp->dst.bo->pitch > 8192) {
 		if (!sna_render_composite_redirect(sna, tmp,
 						   dst_x, dst_y, width, height))
@@ -1848,8 +1855,7 @@ gen2_render_composite_spans(struct sna *sna,
 	sna_render_reduce_damage(&tmp->base, dst_x, dst_y, width, height);
 
 	tmp->base.op = op;
-	if (tmp->base.dst.width > 2048 ||
-	    tmp->base.dst.height > 2048 ||
+	if (too_large(tmp->base.dst.width, tmp->base.dst.height) ||
 	    tmp->base.dst.bo->pitch > 8192) {
 		if (!sna_render_composite_redirect(sna, &tmp->base,
 						   dst_x, dst_y, width, height))
@@ -2054,8 +2060,7 @@ gen2_render_fill_boxes(struct sna *sna,
 	     __FUNCTION__, op, (int)format,
 	     color->red, color->green, color->blue, color->alpha));
 
-	if (dst->drawable.width > 2048 ||
-	    dst->drawable.height > 2048 ||
+	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch < 8 || dst_bo->pitch > 8192 ||
 	    !gen2_check_dst_format(format))
 		return gen2_render_fill_boxes_try_blt(sna, op, format, color,
@@ -2247,8 +2252,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
 		return TRUE;
 
 	/* Must use the BLT if we can't RENDER... */
-	if (dst->drawable.width > 2048 ||
-	    dst->drawable.height > 2048 ||
+	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch < 8 || dst_bo->pitch > 8192)
 		return sna_blt_fill(sna, alu,
 				    dst_bo, dst->drawable.bitsPerPixel,
@@ -2324,7 +2328,7 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 		return TRUE;
 
 	/* Must use the BLT if we can't RENDER... */
-	if (dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
+	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    bo->pitch < 8 || bo->pitch > 8192)
 		return gen2_render_fill_one_try_blt(sna, dst, bo, color,
 						    x1, y1, x2, y2, alu);
@@ -2481,12 +2485,10 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
 		return TRUE;
 
 	if (src_bo == dst_bo || /* XXX handle overlap using 3D ? */
+	    too_large(src->drawable.width, src->drawable.height) ||
 	    src_bo->pitch > 8192 ||
-	    src->drawable.width > 2048 ||
-	    src->drawable.height > 2048 ||
-	    dst_bo->pitch < 8 || dst_bo->pitch > 8192 ||
-	    dst->drawable.width > 2048 ||
-	    dst->drawable.height > 2048) {
+	    too_large(dst->drawable.width, dst->drawable.height) ||
+	    dst_bo->pitch < 8 || dst_bo->pitch > 8192) {
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
 
@@ -2617,10 +2619,9 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
 		return TRUE;
 
 	/* Must use the BLT if we can't RENDER... */
-	if (src->drawable.width > 2048 || src->drawable.height > 2048 ||
-	    dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
-	    src_bo->pitch > 8192 ||
-	    dst_bo->pitch < 8 || dst_bo->pitch > 8192) {
+	if (too_large(src->drawable.width, src->drawable.height) ||
+	    too_large(dst->drawable.width, dst->drawable.height) ||
+	    src_bo->pitch > 8192 || dst_bo->pitch < 8 || dst_bo->pitch > 8192) {
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
 
@@ -2709,6 +2710,6 @@ Bool gen2_render_init(struct sna *sna)
 	render->reset = gen2_render_reset;
 	render->flush = gen2_render_flush;
 
-	render->max_3d_size = 2048;
+	render->max_3d_size = MAX_3D_SIZE;
 	return TRUE;
 }
commit b392474f3a052fefc410383725b64f0fdbe7788e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 21:28:09 2011 +0000

    sna: Force a suitable minimum stride for 3D temporaries
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index f1ed78b..001dcec 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -1193,7 +1193,7 @@ sna_render_composite_redirect(struct sna *sna,
 			    width, height, bpp,
 			    kgem_choose_tiling(&sna->kgem, I915_TILING_X,
 					       width, height, bpp),
-			    0);
+			    CREATE_SCANOUT);
 	if (!bo)
 		return FALSE;
 
commit 3c22baaba97a89d24e507e3d832d53d764902040
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 21:16:56 2011 +0000

    sna/gen2: Check for unhandled pitches in the render pipeline
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index cd2dcf4..e354c56 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -525,7 +525,8 @@ gen2_get_batch(struct sna *sna)
 
 static void gen2_emit_target(struct sna *sna, const struct sna_composite_op *op)
 {
-	assert (sna->render_state.gen2.vertex_offset == 0);
+	assert(op->dst.bo->pitch >= 8 && op->dst.bo->pitch <= 8192);
+	assert(sna->render_state.gen2.vertex_offset == 0);
 
 	if (sna->render_state.gen2.target == op->dst.bo->unique_id) {
 		kgem_bo_mark_dirty(op->dst.bo);
@@ -1208,7 +1209,7 @@ gen2_composite_set_target(struct sna *sna,
 			return FALSE;
 
 		bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
-				     op->dst.width, op->dst.height, 16,
+				     op->dst.width, op->dst.height, 8,
 				     op->dst.pixmap->drawable.bitsPerPixel);
 		if (bo == NULL)
 			return FALSE;
@@ -2055,7 +2056,7 @@ gen2_render_fill_boxes(struct sna *sna,
 
 	if (dst->drawable.width > 2048 ||
 	    dst->drawable.height > 2048 ||
-	    dst_bo->pitch > 8192 ||
+	    dst_bo->pitch < 8 || dst_bo->pitch > 8192 ||
 	    !gen2_check_dst_format(format))
 		return gen2_render_fill_boxes_try_blt(sna, op, format, color,
 						      dst, dst_bo,
@@ -2248,7 +2249,7 @@ gen2_render_fill(struct sna *sna, uint8_t alu,
 	/* Must use the BLT if we can't RENDER... */
 	if (dst->drawable.width > 2048 ||
 	    dst->drawable.height > 2048 ||
-	    dst_bo->pitch > 8192)
+	    dst_bo->pitch < 8 || dst_bo->pitch > 8192)
 		return sna_blt_fill(sna, alu,
 				    dst_bo, dst->drawable.bitsPerPixel,
 				    color,
@@ -2324,7 +2325,7 @@ gen2_render_fill_one(struct sna *sna, PixmapPtr dst, struct kgem_bo *bo,
 
 	/* Must use the BLT if we can't RENDER... */
 	if (dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
-	    bo->pitch > 8192)
+	    bo->pitch < 8 || bo->pitch > 8192)
 		return gen2_render_fill_one_try_blt(sna, dst, bo, color,
 						    x1, y1, x2, y2, alu);
 
@@ -2483,7 +2484,7 @@ gen2_render_copy_boxes(struct sna *sna, uint8_t alu,
 	    src_bo->pitch > 8192 ||
 	    src->drawable.width > 2048 ||
 	    src->drawable.height > 2048 ||
-	    dst_bo->pitch > 8192 ||
+	    dst_bo->pitch < 8 || dst_bo->pitch > 8192 ||
 	    dst->drawable.width > 2048 ||
 	    dst->drawable.height > 2048) {
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
@@ -2618,7 +2619,8 @@ gen2_render_copy(struct sna *sna, uint8_t alu,
 	/* Must use the BLT if we can't RENDER... */
 	if (src->drawable.width > 2048 || src->drawable.height > 2048 ||
 	    dst->drawable.width > 2048 || dst->drawable.height > 2048 ||
-	    src_bo->pitch > 8192 || dst_bo->pitch > 8192) {
+	    src_bo->pitch > 8192 ||
+	    dst_bo->pitch < 8 || dst_bo->pitch > 8192) {
 		if (!sna_blt_compare_depth(&src->drawable, &dst->drawable))
 			return FALSE;
 
commit f6a30df8dc75852d1f6daeabe4511b275594e52e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 20:40:29 2011 +0000

    sna: Enable memcpy uploads to SHM pixmaps
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index dc0fad5..c7b4ebf 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1441,12 +1441,16 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	int16_t dx, dy;
 	int n;
 
-	if (!priv)
-		return false;
-
 	if (gc->alu != GXcopy)
 		return false;
 
+	if (!priv) {
+		if (drawable->depth < 8)
+			return false;
+
+		goto blt;
+	}
+
 	/* XXX performing the upload inplace is currently about 20x slower
 	 * for putimage10 on gen6 -- mostly due to slow page faulting in kernel.
 	 * So we try again with vma caching and only for pixmaps who will be
@@ -1496,6 +1500,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	if (priv->flush)
 		list_move(&priv->list, &sna->dirty_pixmaps);
 
+blt:
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
 	x += dx + drawable->x;
 	y += dy + drawable->y;
commit 3c163d105e964a1084d665500ef917254d8f2179
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 16:26:13 2011 +0000

    sna: Use the CPU bo as a render source if compatible and no GPU bo
    
    This is principally to catch the cases of compositing after a fresh
    PutImage.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6a17bfe..1a09849 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1447,13 +1447,13 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 		goto done;
 	}
 
-	if (tiling == I915_TILING_X && width * bpp < 8*512/2) {
+	if (tiling == I915_TILING_X && width * bpp <= 8*512/2) {
 		DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n",
 		     __FUNCTION__, width, bpp));
 		tiling = I915_TILING_NONE;
 		goto done;
 	}
-	if (tiling == I915_TILING_Y && width * bpp < 8*32/2) {
+	if (tiling == I915_TILING_Y && width * bpp <= 8*32/2) {
 		DBG(("%s: too thin [%d] for TILING_Y\n",
 		     __FUNCTION__, width));
 		tiling = I915_TILING_NONE;
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 13c0526..f1ed78b 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -245,6 +245,59 @@ void no_render_init(struct sna *sna)
 		sna->kgem.ring = KGEM_BLT;
 }
 
+static struct kgem_bo *
+use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box)
+{
+	struct sna_pixmap *priv;
+
+	priv = sna_pixmap(pixmap);
+	if (priv == NULL || priv->cpu_bo == NULL) {
+		DBG(("%s: no cpu bo\n", __FUNCTION__));
+		return NULL;
+	}
+
+	if (priv->gpu_bo &&
+	    sna_damage_contains_box(priv->cpu_damage,
+				    box) == PIXMAN_REGION_OUT) {
+		DBG(("%s: has GPU bo and no damage to upload\n", __FUNCTION__));
+		return NULL;
+	}
+
+	if (sna_damage_contains_box(priv->gpu_damage,
+				    box) != PIXMAN_REGION_OUT) {
+		DBG(("%s: box is damaged on the GPU\n", __FUNCTION__));
+		return NULL;
+	}
+
+	if (pixmap->usage_hint)
+		goto done;
+
+	if (priv->gpu_bo) {
+		if (priv->gpu_bo != I915_TILING_NONE) {
+			DBG(("%s: GPU bo exists and is tiled [%d], upload\n",
+			     __FUNCTION__, priv->gpu_bo->tiling));
+			return NULL;
+		}
+	} else {
+		int w = box->x2 - box->x1;
+		int h = box->y2 - box->y1;
+		if (priv->source_count*w*h >= pixmap->drawable.width * pixmap->drawable.height &&
+		    I915_TILING_NONE != kgem_choose_tiling(&sna->kgem, I915_TILING_X,
+							   pixmap->drawable.width,
+							   pixmap->drawable.height,
+							   pixmap->drawable.bitsPerPixel)) {
+			DBG(("%s: pitch (%d) requires tiling\n",
+			     __FUNCTION__, priv->cpu_bo->pitch));
+			return NULL;
+		}
+	}
+
+done:
+	DBG(("%s for box=(%d, %d), (%d, %d)\n",
+	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+	return kgem_bo_reference(priv->cpu_bo);
+}
+
 static Bool
 move_to_gpu(PixmapPtr pixmap, const BoxRec *box)
 {
@@ -425,7 +478,8 @@ sna_render_pixmap_bo(struct sna *sna,
 	     channel->offset[0], channel->offset[1],
 	     pixmap->drawable.width, pixmap->drawable.height));
 
-	if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
+	bo = use_cpu_bo(sna, pixmap, &box);
+	if (bo == NULL && texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
 		/* If we are using transient data, it is better to copy
 		 * to an amalgamated upload buffer so that we don't
 		 * stall on releasing the cpu bo immediately upon
@@ -795,7 +849,7 @@ sna_render_picture_extract(struct sna *sna,
 					      &box,
 					      pixmap->devKind,
 					      pixmap->drawable.bitsPerPixel);
-		if (!bo) {
+		if (bo == NULL) {
 			DBG(("%s: failed to upload source image, using clear\n",
 			     __FUNCTION__));
 			return 0;
commit c481bec356b2e40e66a000dbaaf261bf7aae930d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 14:10:57 2011 +0000

    sna: Experiment with creating the CPU pixmap using an LLC BO
    
    A poor cousin to vmap is to instead allocate snooped bo and use a CPU
    mapping for zero-copy uploads into GPU resident memory. For maximum
    performance, we still need tiled GPU buffers so CPU bo are only useful
    in situations where we are frequently migrating data.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fac9c0e..6a17bfe 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -86,7 +86,11 @@ static inline void list_replace(struct list *old,
 #endif
 
 #define PAGE_SIZE 4096
-#define MAX_VMA_CACHE 128
+#define MAX_VMA_CACHE 256
+
+#define IS_CPU_MAP(ptr) ((uintptr_t)(ptr) & 1)
+#define CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) & ~1))
+#define MAKE_CPU_MAP(ptr) ((void*)((uintptr_t)(ptr) | 1))
 
 struct kgem_partial_bo {
 	struct kgem_bo base;
@@ -618,9 +622,10 @@ static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
 	}
 
 	if (bo->map) {
-		DBG(("%s: releasing vma for handle=%d, count=%d\n",
-		     __FUNCTION__, bo->handle, kgem->vma_count-1));
-		munmap(bo->map, bo->size);
+		DBG(("%s: releasing %s vma for handle=%d, count=%d\n",
+		       __FUNCTION__, IS_CPU_MAP(bo->map) ? "CPU" : "GTT",
+		       bo->handle, kgem->vma_count-1));
+		munmap(CPU_MAP(bo->map), bo->size);
 		list_del(&bo->vma);
 		kgem->vma_count--;
 	}
@@ -657,34 +662,39 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		}
 	}
 
-	if (!bo->reusable)
+	if (!bo->reusable) {
+		DBG(("%s: handle=%d, not reusable\n",
+		     __FUNCTION__, bo->handle));
 		goto destroy;
-
-	if (!bo->rq && !bo->needs_flush) {
-		assert(!bo->purged);
-
-		DBG(("%s: handle=%d, purged\n", __FUNCTION__, bo->handle));
-
-		if (!gem_madvise(kgem->fd, bo->handle, I915_MADV_DONTNEED)) {
-			kgem->need_purge |= bo->gpu;
-			goto destroy;
-		}
-
-		bo->purged = true;
 	}
 
 	kgem->need_expire = true;
 	if (bo->rq) {
 		DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
 		list_move(&bo->list, active(kgem, bo->size));
-	} else if (bo->purged) {
-		DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
-		list_move(&bo->list, inactive(kgem, bo->size));
-	} else {
+	} else if (bo->needs_flush) {
 		DBG(("%s: handle=%d -> flushing\n", __FUNCTION__, bo->handle));
 		assert(list_is_empty(&bo->request));
 		list_add(&bo->request, &kgem->flushing);
 		list_move(&bo->list, active(kgem, bo->size));
+	} else {
+		if (!IS_CPU_MAP(bo->map)) {
+			assert(!bo->purged);
+
+			DBG(("%s: handle=%d, purged\n",
+			     __FUNCTION__, bo->handle));
+
+			if (!gem_madvise(kgem->fd, bo->handle,
+					 I915_MADV_DONTNEED)) {
+				kgem->need_purge |= bo->gpu;
+				goto destroy;
+			}
+
+			bo->purged = true;
+		}
+
+		DBG(("%s: handle=%d -> inactive\n", __FUNCTION__, bo->handle));
+		list_move(&bo->list, inactive(kgem, bo->size));
 	}
 
 	return;
@@ -1188,7 +1198,6 @@ bool kgem_expire_cache(struct kgem *kgem)
 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
 		idle &= list_is_empty(&kgem->inactive[i]);
 		list_for_each_entry(bo, &kgem->inactive[i], list) {
-			assert(bo->purged);
 			if (bo->delta) {
 				expire = now - MAX_INACTIVE_TIME;
 				break;
@@ -1213,8 +1222,9 @@ bool kgem_expire_cache(struct kgem *kgem)
 			bo = list_last_entry(&kgem->inactive[i],
 					     struct kgem_bo, list);
 
-			if (gem_madvise(kgem->fd, bo->handle,
-					I915_MADV_DONTNEED) &&
+			if ((!bo->purged ||
+			     gem_madvise(kgem->fd, bo->handle,
+					 I915_MADV_DONTNEED)) &&
 			    bo->delta > expire) {
 				idle = false;
 				break;
@@ -1844,32 +1854,47 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 	return delta;
 }
 
+static void kgem_trim_vma_cache(struct kgem *kgem)
+{
+	/* vma are limited on a per-process basis to around 64k.
+	 * This includes all malloc arenas as well as other file
+	 * mappings. In order to be fair and not hog the cache,
+	 * and more importantly not to exhaust that limit and to
+	 * start failing mappings, we keep our own number of open
+	 * vma to within a conservative value.
+	 */
+	while (kgem->vma_count > MAX_VMA_CACHE) {
+		struct kgem_bo *old;
+
+		old = list_first_entry(&kgem->vma_cache,
+				       struct kgem_bo,
+				       vma);
+		DBG(("%s: discarding %s vma cache for %d\n",
+		     __FUNCTION__, IS_CPU_MAP(old->map) ? "CPU" : "GTT",
+		     old->handle));
+		munmap(CPU_MAP(old->map), old->size);
+		old->map = NULL;
+		list_del(&old->vma);
+		kgem->vma_count--;
+	}
+}
+
 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 {
 	void *ptr;
 
+	if (IS_CPU_MAP(bo->map)) {
+		DBG(("%s: discarding CPU vma cache for %d\n",
+		       __FUNCTION__, bo->handle));
+		munmap(CPU_MAP(bo->map), bo->size);
+		bo->map = NULL;
+		list_del(&bo->vma);
+		kgem->vma_count--;
+	}
+
 	ptr = bo->map;
 	if (ptr == NULL) {
-		/* vma are limited on a per-process basis to around 64k.
-		 * This includes all malloc arenas as well as other file
-		 * mappings. In order to be fair and not hog the cache,
-		 * and more importantly not to exhaust that limit and to
-		 * start failing mappings, we keep our own number of open
-		 * vma to within a conservative value.
-		 */
-		while (kgem->vma_count > MAX_VMA_CACHE) {
-			struct kgem_bo *old;
-
-			old = list_first_entry(&kgem->vma_cache,
-					       struct kgem_bo,
-					       vma);
-			DBG(("%s: discarding vma cache for %d\n",
-			     __FUNCTION__, old->handle));
-			munmap(old->map, old->size);
-			old->map = NULL;
-			list_del(&old->vma);
-			kgem->vma_count--;
-		}
+		kgem_trim_vma_cache(kgem);
 
 		ptr = gem_mmap(kgem->fd, bo->handle, bo->size,
 			       PROT_READ | PROT_WRITE);
@@ -1907,6 +1932,53 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 	return ptr;
 }
 
+void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
+{
+	struct drm_i915_gem_mmap mmap_arg;
+
+	DBG(("%s(handle=%d, size=%d)\n", __FUNCTION__, bo->handle, bo->size));
+
+	if (IS_CPU_MAP(bo->map)) {
+		void *ptr = CPU_MAP(bo->map);
+		list_del(&bo->vma);
+		kgem->vma_count--;
+		bo->map = NULL;
+		return ptr;
+	}
+
+	if (bo->map) {
+		DBG(("%s: discarding GTT vma cache for %d\n",
+		       __FUNCTION__, bo->handle));
+		munmap(CPU_MAP(bo->map), bo->size);
+		bo->map = NULL;
+		list_del(&bo->vma);
+		kgem->vma_count--;
+	}
+
+	kgem_trim_vma_cache(kgem);
+
+	VG_CLEAR(mmap_arg);
+	mmap_arg.handle = bo->handle;
+	mmap_arg.offset = 0;
+	mmap_arg.size = bo->size;
+	if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg)) {
+		assert(0);
+		return NULL;
+	}
+
+	VG(VALGRIND_MAKE_MEM_DEFINED(mmap_arg.addr_ptr, bo->size));
+	return (void *)(uintptr_t)mmap_arg.addr_ptr;
+}
+
+void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr)
+{
+	assert(bo->map == NULL);
+
+	bo->map = MAKE_CPU_MAP(ptr);
+	list_move(&bo->vma, &kgem->vma_cache);
+	kgem->vma_count++;
+}
+
 void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
 {
 	if (bo->map == NULL)
@@ -1915,7 +1987,7 @@ void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
 	DBG(("%s: (debug) releasing vma for handle=%d, count=%d\n",
 	     __FUNCTION__, bo->handle, kgem->vma_count-1));
 
-	munmap(bo->map, bo->size);
+	munmap(CPU_MAP(bo->map), bo->size);
 	bo->map = NULL;
 
 	list_del(&bo->vma);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 0d85f64..2fd5a55 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -319,6 +319,8 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
 
 void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot);
 void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo);
+void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo);
+void kgem_bo_unmap__cpu(struct kgem *kgem, struct kgem_bo *bo, void *ptr);
 uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo);
 
 Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index c39b45e..dc0fad5 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -61,6 +61,7 @@
 #define USE_SPANS 0
 #define USE_ZERO_SPANS 1
 #define USE_BO_FOR_SCRATCH_PIXMAP 1
+#define USE_LLC_CPU_BO 1
 
 static int sna_font_key;
 
@@ -177,6 +178,54 @@ static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
 	priv->source_count = SOURCE_BIAS;
 }
 
+static void sna_pixmap_alloc_cpu(struct sna *sna,
+				 PixmapPtr pixmap,
+				 struct sna_pixmap *priv)
+{
+	if (USE_LLC_CPU_BO && sna->kgem.gen >= 60) {
+		DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
+		     pixmap->drawable.width, pixmap->drawable.height));
+
+		priv->cpu_bo = kgem_create_2d(&sna->kgem,
+					      pixmap->drawable.width,
+					      pixmap->drawable.height,
+					      pixmap->drawable.bitsPerPixel,
+					      I915_TILING_NONE,
+					      CREATE_INACTIVE);
+		DBG(("%s: allocated CPU handle=%d\n", __FUNCTION__,
+		     priv->cpu_bo->handle));
+
+		if (priv->cpu_bo) {
+			priv->ptr = kgem_bo_map__cpu(&sna->kgem, priv->cpu_bo);
+			if (priv->ptr == NULL) {
+				kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
+				priv->cpu_bo = NULL;
+			}
+		}
+	}
+
+	if (priv->ptr == NULL)
+		priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
+
+	assert(priv->ptr);
+	pixmap->devPrivate.ptr = priv->ptr;
+}
+
+static void sna_pixmap_free_cpu(struct sna *sna, struct sna_pixmap *priv)
+{
+	DBG(("%s: discarding CPU buffer, handle=%d, size=%d\n",
+	     __FUNCTION__, priv->cpu_bo->handle, priv->cpu_bo->size));
+
+	if (priv->cpu_bo) {
+		kgem_bo_unmap__cpu(&sna->kgem, priv->cpu_bo, priv->ptr);
+		kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
+
+		priv->cpu_bo = NULL;
+	} else
+		free(priv->ptr);
+	priv->pixmap->devPrivate.ptr = priv->ptr = NULL;
+}
+
 static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 {
 	struct sna *sna = to_sna_from_pixmap(pixmap);
@@ -191,6 +240,9 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 	if (priv->gpu_bo)
 		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
 
+	if (priv->ptr)
+		sna_pixmap_free_cpu(sna, priv);
+
 	if (priv->cpu_bo) {
 		if (kgem_bo_is_busy(priv->cpu_bo)) {
 			list_add_tail(&priv->list, &sna->deferred_free);
@@ -208,7 +260,6 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 		return false;
 	}
 
-	free(priv->ptr);
 	free(priv);
 	return true;
 }
@@ -531,12 +582,10 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
 	     __FUNCTION__, priv->gpu_bo, priv->gpu_damage, priv->gpu_only));
 
 	if (pixmap->devPrivate.ptr == NULL) {
-		DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
 		assert(priv->ptr == NULL);
 		assert(pixmap->devKind);
 		assert(priv->cpu_damage == NULL);
-		priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
-		pixmap->devPrivate.ptr = priv->ptr;
+		sna_pixmap_alloc_cpu(sna, pixmap, priv);
 	}
 
 	if (priv->gpu_bo == NULL) {
@@ -644,12 +693,10 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 	}
 
 	if (pixmap->devPrivate.ptr == NULL) {
-		DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
 		assert(priv->ptr == NULL);
 		assert(pixmap->devKind);
 		assert(priv->cpu_damage == NULL);
-		priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
-		pixmap->devPrivate.ptr = priv->ptr;
+		sna_pixmap_alloc_cpu(sna, pixmap, priv);
 	}
 
 	if (priv->gpu_bo == NULL)
@@ -1397,13 +1444,6 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	if (!priv)
 		return false;
 
-	if (pixmap->devPrivate.ptr == NULL) {
-		if (priv->gpu_bo == NULL)
-			return false;
-		return sna_put_image_upload_blt(drawable, gc, region,
-						x, y, w, h, bits, stride);
-	}
-
 	if (gc->alu != GXcopy)
 		return false;
 
@@ -1432,6 +1472,9 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	if (priv->cpu_bo)
 		kgem_bo_sync(&sna->kgem, priv->cpu_bo, true);
 
+	if (pixmap->devPrivate.ptr == NULL)
+		sna_pixmap_alloc_cpu(sna, pixmap, priv);
+
 	if (region_subsumes_drawable(region, &pixmap->drawable)) {
 		DBG(("%s: replacing entire pixmap\n", __FUNCTION__));
 		sna_damage_all(&priv->cpu_damage,
@@ -2216,11 +2259,8 @@ fallback:
 							  &sna->dirty_pixmaps);
 				}
 
-				if (dst_pixmap->devPrivate.ptr == NULL) {
-					DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
-					dst_priv->ptr = malloc(dst_pixmap->devKind * dst_pixmap->drawable.height);
-					dst_pixmap->devPrivate.ptr = dst_priv->ptr;
-				}
+				if (dst_pixmap->devPrivate.ptr == NULL)
+					sna_pixmap_alloc_cpu(sna, dst_pixmap, dst_priv);
 			} else
 				sna_drawable_move_region_to_cpu(&dst_pixmap->drawable,
 								&region, true);
@@ -8610,12 +8650,11 @@ static void sna_accel_inactive(struct sna *sna)
 
 	list_init(&preserve);
 	list_for_each_entry_safe(priv, next, &sna->active_pixmaps, inactive) {
-		if (priv->ptr && sna_damage_is_all(&priv->gpu_damage,
-						   priv->pixmap->drawable.width,
-						   priv->pixmap->drawable.height)) {
-			DBG(("%s: discarding CPU buffer\n", __FUNCTION__));
-			free(priv->ptr);
-			priv->pixmap->devPrivate.ptr = priv->ptr = NULL;
+		if (priv->ptr &&
+		    sna_damage_is_all(&priv->gpu_damage,
+				      priv->pixmap->drawable.width,
+				      priv->pixmap->drawable.height)) {
+			sna_pixmap_free_cpu(sna, priv);
 			list_move(&priv->inactive, &preserve);
 		}
 	}
commit 6c9aa6f9cf8e59ca6aa1866b83690a1de8cfb757
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 11:47:25 2011 +0000

    sna: Defer allocation of memory for larger pixmap until first use
    
    In the happy scenario where the pixmap only resides upon the GPU we can
    forgo the CPU allocation entirely. The goal is to reduce the number of
    needless mmaps performed by the system memory allocator and reduce
    overall memory consumption.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 7ff0904..fac9c0e 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1909,9 +1909,11 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 
 void kgem_bo_unmap(struct kgem *kgem, struct kgem_bo *bo)
 {
+	if (bo->map == NULL)
+		return;
+
 	DBG(("%s: (debug) releasing vma for handle=%d, count=%d\n",
 	     __FUNCTION__, bo->handle, kgem->vma_count-1));
-	assert(bo->map);
 
 	munmap(bo->map, bo->size);
 	bo->map = NULL;
diff --git a/src/sna/sna.h b/src/sna/sna.h
index ad8a083..f7467f1 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -133,6 +133,7 @@ struct sna_pixmap {
 	PixmapPtr pixmap;
 	struct kgem_bo *gpu_bo, *cpu_bo;
 	struct sna_damage *gpu_damage, *cpu_damage;
+	void *ptr;
 
 	struct list list;
 	struct list inactive;
@@ -143,6 +144,7 @@ struct sna_pixmap {
 	uint8_t gpu_only :1;
 	uint8_t flush :1;
 	uint8_t gpu :1;
+	uint8_t freed :1;
 };
 
 struct sna_glyph {
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 7fb48f0..c39b45e 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -200,13 +200,15 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 		kgem_bo_destroy(&sna->kgem, priv->cpu_bo);
 	}
 
-	if (!sna->freed_pixmap) {
+	if (!sna->freed_pixmap && priv->freed) {
 		sna->freed_pixmap = pixmap;
+		assert(priv->ptr == NULL);
 		priv->gpu_bo = NULL;
 		priv->cpu_bo = NULL;
 		return false;
 	}
 
+	free(priv->ptr);
 	free(priv);
 	return true;
 }
@@ -411,6 +413,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 	}
 
 	priv->gpu_only = 1;
+	priv->freed = 1;
 	sna_damage_all(&priv->gpu_damage, width, height);
 
 	miModifyPixmapHeader(pixmap,
@@ -426,6 +429,8 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 				   unsigned int usage)
 {
 	PixmapPtr pixmap;
+	struct sna_pixmap *priv;
+	int pad, size;
 
 	DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__,
 	     width, height, depth, usage));
@@ -458,13 +463,34 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 
 	/* XXX could use last deferred free? */
 
-	pixmap = create_pixmap(to_sna_from_screen(screen), screen,
-			       width, height, depth, usage);
-	if (pixmap == NullPixmap)
-		return NullPixmap;
+	pad = PixmapBytePad(width, depth);
+	size = pad * height;
+	if (size < 4096) {
+		pixmap = create_pixmap(to_sna_from_screen(screen), screen,
+				       width, height, depth, usage);
+		if (pixmap == NullPixmap)
+			return NullPixmap;
+
+		priv = sna_pixmap_attach(pixmap);
+	} else {
+		pixmap = create_pixmap(to_sna_from_screen(screen), screen,
+				       0, 0, depth, usage);
+		if (pixmap == NullPixmap)
+			return NullPixmap;
+
+		pixmap->drawable.width = width;
+		pixmap->drawable.height = height;
+		pixmap->devKind = pad;
+		pixmap->devPrivate.ptr = NULL;
+
+		priv = sna_pixmap_attach(pixmap);
+		if (priv == NULL) {
+			free(pixmap);
+			return create_pixmap(to_sna_from_screen(screen), screen,
+					     width, height, depth, usage);
+		}
+	}
 
-/* XXX if (pixmap->drawable.devKind * height > 128) */
-	sna_pixmap_attach(pixmap);
 	return pixmap;
 }
 
@@ -481,31 +507,6 @@ static Bool sna_destroy_pixmap(PixmapPtr pixmap)
 	return fbDestroyPixmap(pixmap);
 }
 
-static void sna_pixmap_map_to_cpu(struct sna *sna,
-				 PixmapPtr pixmap,
-				 struct sna_pixmap *priv)
-{
-	ScreenPtr screen = pixmap->drawable.pScreen;
-	void *ptr;
-
-	DBG(("%s: AWOOGA, AWOOGA!\n", __FUNCTION__));
-
-	kgem_bo_submit(&sna->kgem, priv->gpu_bo);
-
-	ptr = kgem_bo_map(&sna->kgem,
-			  priv->gpu_bo,
-			  PROT_READ | PROT_WRITE);
-	assert(ptr != NULL);
-
-	screen->ModifyPixmapHeader(pixmap,
-				   pixmap->drawable.width,
-				   pixmap->drawable.height,
-				   pixmap->drawable.depth,
-				   pixmap->drawable.bitsPerPixel,
-				   priv->gpu_bo->pitch,
-				   ptr);
-}
-
 static inline void list_move(struct list *list, struct list *head)
 {
 	__list_del(list->prev, list->next);
@@ -529,13 +530,17 @@ sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write)
 	DBG(("%s: gpu_bo=%p, gpu_damage=%p, gpu_only=%d\n",
 	     __FUNCTION__, priv->gpu_bo, priv->gpu_damage, priv->gpu_only));
 
-	if (priv->gpu_bo == NULL) {
-		DBG(("%s: no GPU bo\n", __FUNCTION__));
-		goto done;
+	if (pixmap->devPrivate.ptr == NULL) {
+		DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
+		assert(priv->ptr == NULL);
+		assert(pixmap->devKind);
+		assert(priv->cpu_damage == NULL);
+		priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
+		pixmap->devPrivate.ptr = priv->ptr;
 	}
 
-	if (priv->gpu_only) {
-		sna_pixmap_map_to_cpu(sna, pixmap, priv);
+	if (priv->gpu_bo == NULL) {
+		DBG(("%s: no GPU bo\n", __FUNCTION__));
 		goto done;
 	}
 
@@ -626,11 +631,6 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		return;
 	}
 
-	if (priv->gpu_only) {
-		DBG(("%s: gpu only\n", __FUNCTION__));
-		return sna_pixmap_map_to_cpu(sna, pixmap, priv);
-	}
-
 	get_drawable_deltas(drawable, pixmap, &dx, &dy);
 	DBG(("%s: delta=(%d, %d)\n", __FUNCTION__, dx, dy));
 	if (dx | dy)
@@ -643,12 +643,14 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		return sna_pixmap_move_to_cpu(pixmap, write);
 	}
 
-#if 0
-	pixman_region_intersect_rect(region, region,
-				     0, 0,
-				     pixmap->drawable.width,
-				     pixmap->drawable.height);
-#endif
+	if (pixmap->devPrivate.ptr == NULL) {
+		DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
+		assert(priv->ptr == NULL);
+		assert(pixmap->devKind);
+		assert(priv->cpu_damage == NULL);
+		priv->ptr = malloc(pixmap->devKind * pixmap->drawable.height);
+		pixmap->devPrivate.ptr = priv->ptr;
+	}
 
 	if (priv->gpu_bo == NULL)
 		goto done;
@@ -847,8 +849,6 @@ _sna_drawable_use_gpu_bo(DrawablePtr drawable,
 	if (priv->cpu_damage == NULL)
 		goto done;
 
-	assert(!priv->gpu_only);
-
 	if (sna_damage_contains_box(priv->cpu_damage,
 				    &extents) == PIXMAN_REGION_OUT)
 		goto done;
@@ -990,8 +990,10 @@ sna_pixmap_create_upload(ScreenPtr screen,
 	priv->source_count = SOURCE_BIAS;
 	priv->cpu_bo = NULL;
 	priv->cpu_damage = priv->gpu_damage = NULL;
+	priv->ptr = NULL;
 	priv->gpu_only = 0;
 	priv->pinned = 0;
+	priv->freed = 1;
 	list_init(&priv->list);
 	list_init(&priv->inactive);
 
@@ -1087,6 +1089,8 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap)
 	if (priv->cpu_damage == NULL)
 		goto done;
 
+	assert(pixmap->devPrivate.ptr != NULL);
+
 	n = sna_damage_get_boxes(priv->cpu_damage, &box);
 	if (n) {
 		struct kgem_bo *src_bo;
@@ -1124,10 +1128,10 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap)
 	__sna_damage_destroy(priv->cpu_damage);
 	priv->cpu_damage = NULL;
 
-	sna_damage_reduce(&priv->gpu_damage);
 done:
 	sna_damage_reduce_all(&priv->gpu_damage,
-			      pixmap->drawable.width, pixmap->drawable.height);
+			      pixmap->drawable.width,
+			      pixmap->drawable.height);
 	list_del(&priv->list);
 	if (!priv->gpu_only && !priv->pinned)
 		list_move(&priv->inactive, &sna->active_pixmaps);
@@ -1336,6 +1340,8 @@ sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	     __FUNCTION__, nbox,
 	     box->x1, box->y1, box->x2, box->y2));
 
+	assert(priv->gpu_bo);
+
 	if (gc->alu == GXcopy &&
 	    !priv->pinned && nbox == 1 &&
 	    box->x1 <= 0 && box->y1 <= 0 &&
@@ -1391,9 +1397,12 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	if (!priv)
 		return false;
 
-	if (priv->gpu_only)
+	if (pixmap->devPrivate.ptr == NULL) {
+		if (priv->gpu_bo == NULL)
+			return false;
 		return sna_put_image_upload_blt(drawable, gc, region,
 						x, y, w, h, bits, stride);
+	}
 
 	if (gc->alu != GXcopy)
 		return false;
@@ -2206,6 +2215,12 @@ fallback:
 						list_move(&dst_priv->list,
 							  &sna->dirty_pixmaps);
 				}
+
+				if (dst_pixmap->devPrivate.ptr == NULL) {
+					DBG(("%s: allocating CPU buffer\n", __FUNCTION__));
+					dst_priv->ptr = malloc(dst_pixmap->devKind * dst_pixmap->drawable.height);
+					dst_pixmap->devPrivate.ptr = dst_priv->ptr;
+				}
 			} else
 				sna_drawable_move_region_to_cpu(&dst_pixmap->drawable,
 								&region, true);
@@ -8562,12 +8577,51 @@ static void sna_accel_expire(struct sna *sna)
 
 static void sna_accel_inactive(struct sna *sna)
 {
+	struct sna_pixmap *priv, *next;
+	struct list preserve;
+
 	DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis()));
 
+#if DEBUG_ACCEL
+	{
+		unsigned count, bytes;
+
+		count = bytes = 0;
+		list_for_each_entry(priv, &sna->inactive_clock[1], inactive)
+			count++, bytes += priv->gpu_bo->size;
+
+		DBG(("%s: trimming %d inactive GPU buffers, %d bytes\n",
+		    __FUNCTION__, count, bytes));
+
+		count = bytes = 0;
+		list_for_each_entry(priv, &sna->active_pixmaps, inactive) {
+			if (priv->ptr &&
+			    sna_damage_is_all(&priv->gpu_damage,
+					      priv->pixmap->drawable.width,
+					      priv->pixmap->drawable.height)) {
+				count++, bytes += priv->pixmap->devKind * priv->pixmap->drawable.height;
+			}
+		}
+
+		DBG(("%s: trimming %d inactive CPU buffers, %d bytes\n",
+		    __FUNCTION__, count, bytes));
+	}
+#endif
+
+	list_init(&preserve);
+	list_for_each_entry_safe(priv, next, &sna->active_pixmaps, inactive) {
+		if (priv->ptr && sna_damage_is_all(&priv->gpu_damage,
+						   priv->pixmap->drawable.width,
+						   priv->pixmap->drawable.height)) {
+			DBG(("%s: discarding CPU buffer\n", __FUNCTION__));
+			free(priv->ptr);
+			priv->pixmap->devPrivate.ptr = priv->ptr = NULL;
+			list_move(&priv->inactive, &preserve);
+		}
+	}
+
 	/* clear out the oldest inactive pixmaps */
 	while (!list_is_empty(&sna->inactive_clock[1])) {
-		struct sna_pixmap *priv;
-
 		priv = list_first_entry(&sna->inactive_clock[1],
 					struct sna_pixmap,
 					inactive);
@@ -8596,10 +8650,14 @@ static void sna_accel_inactive(struct sna *sna)
 	sna->active_pixmaps.prev->next = &sna->inactive_clock[0];
 	sna->inactive_clock[0].prev = sna->active_pixmaps.prev;
 
-	list_init(&sna->active_pixmaps);
+	sna->active_pixmaps.next = preserve.next;
+	preserve.next->prev = &sna->active_pixmaps;
+	preserve.prev->next = &sna->active_pixmaps;
+	sna->active_pixmaps.prev = preserve.prev;
 
 	if (list_is_empty(&sna->inactive_clock[1]) &&
-	    list_is_empty(&sna->inactive_clock[0]))
+	    list_is_empty(&sna->inactive_clock[0]) &&
+	    list_is_empty(&sna->active_pixmaps))
 		_sna_accel_disarm_timer(sna, INACTIVE_TIMER);
 }
 
diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h
index 5c88cf6..0e681b7 100644
--- a/src/sna/sna_damage.h
+++ b/src/sna/sna_damage.h
@@ -145,13 +145,14 @@ static inline void sna_damage_reduce_all(struct sna_damage **damage,
 	if (*damage == NULL)
 		return;
 
+	if ((*damage)->n && (*damage = _sna_damage_reduce(*damage)) == NULL)
+		return;
+
 	if ((*damage)->mode == DAMAGE_ADD &&
 	    (*damage)->extents.x1 <= 0 &&
 	    (*damage)->extents.y1 <= 0 &&
 	    (*damage)->extents.x2 >= width &&
 	    (*damage)->extents.y2 >= height) {
-		if ((*damage)->n)
-			*damage = _sna_damage_reduce(*damage);
 		if ((*damage)->region.data == NULL)
 			*damage = _sna_damage_all(*damage, width, height);
 	}
commit 4b48d28f6e85f345730cb7242f2152ac115b267a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 10:52:34 2011 +0000

    sna: Fix a typo, end statements with semi-colons
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index f6ac635..7fb48f0 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -438,7 +438,7 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 #else
 		return create_pixmap(to_sna_from_screen(screen), screen,
 				     width, height, depth,
-				     usage),
+				     usage);
 #endif
 
 	if (usage == SNA_CREATE_SCRATCH)
commit 4d20798c7871ffe8581e2cf509b6aa2e40b3ae5f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Mon Dec 12 10:10:16 2011 +0000

    sna: We need to remap the gpu_only mmap prior to every use
    
    Since the VMA may be reaped at any time whilst the mapping is idle.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 3c5d56b..ad8a083 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -139,7 +139,6 @@ struct sna_pixmap {
 
 #define SOURCE_BIAS 4
 	uint16_t source_count;
-	uint8_t mapped :1;
 	uint8_t pinned :1;
 	uint8_t gpu_only :1;
 	uint8_t flush :1;
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a71d5e1..f6ac635 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -204,7 +204,6 @@ static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv)
 		sna->freed_pixmap = pixmap;
 		priv->gpu_bo = NULL;
 		priv->cpu_bo = NULL;
-		priv->mapped = 0;
 		return false;
 	}
 
@@ -486,27 +485,25 @@ static void sna_pixmap_map_to_cpu(struct sna *sna,
 				 PixmapPtr pixmap,
 				 struct sna_pixmap *priv)
 {
+	ScreenPtr screen = pixmap->drawable.pScreen;
+	void *ptr;
+
 	DBG(("%s: AWOOGA, AWOOGA!\n", __FUNCTION__));
 
-	if (priv->mapped == 0) {
-		ScreenPtr screen = pixmap->drawable.pScreen;
-		void *ptr;
+	kgem_bo_submit(&sna->kgem, priv->gpu_bo);
 
-		ptr = kgem_bo_map(&sna->kgem,
-				  priv->gpu_bo,
-				  PROT_READ | PROT_WRITE);
-		assert(ptr != NULL);
+	ptr = kgem_bo_map(&sna->kgem,
+			  priv->gpu_bo,
+			  PROT_READ | PROT_WRITE);
+	assert(ptr != NULL);
 
-		screen->ModifyPixmapHeader(pixmap,
-					   pixmap->drawable.width,
-					   pixmap->drawable.height,
-					   pixmap->drawable.depth,
-					   pixmap->drawable.bitsPerPixel,
-					   priv->gpu_bo->pitch,
-					   ptr);
-		priv->mapped = 1;
-	}
-	kgem_bo_submit(&sna->kgem, priv->gpu_bo);
+	screen->ModifyPixmapHeader(pixmap,
+				   pixmap->drawable.width,
+				   pixmap->drawable.height,
+				   pixmap->drawable.depth,
+				   pixmap->drawable.bitsPerPixel,
+				   priv->gpu_bo->pitch,
+				   ptr);
 }
 
 static inline void list_move(struct list *list, struct list *head)
@@ -995,7 +992,6 @@ sna_pixmap_create_upload(ScreenPtr screen,
 	priv->cpu_damage = priv->gpu_damage = NULL;
 	priv->gpu_only = 0;
 	priv->pinned = 0;
-	priv->mapped = 0;
 	list_init(&priv->list);
 	list_init(&priv->inactive);
 


More information about the xorg-commit mailing list