xf86-video-intel: 9 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna.h src/sna/sna_io.c src/sna/sna_render.h src/sna/sna_tiling.c

Tue Jan 10 10:08:39 PST 2012

src/sna/gen2_render.c |   21 ++-
 src/sna/gen3_render.c |   46 +++++---
 src/sna/gen4_render.c |    8 -
 src/sna/gen5_render.c |    9 +
 src/sna/gen6_render.c |    9 +
 src/sna/gen7_render.c |    9 +
 src/sna/kgem.c        |  279 +++++++++++++++++++++++++++++---------------------
 src/sna/sna.h         |    5 
 src/sna/sna_accel.c   |  105 +++++-------------
 src/sna/sna_blt.c     |  101 +++++++++++++-----
 src/sna/sna_io.c      |   45 ++++++++
 src/sna/sna_render.h  |    6 +
 src/sna/sna_tiling.c  |  101 ++++++++++++++++++
 13 files changed, 505 insertions(+), 239 deletions(-)

New commits:
commit ca2a07adc45273dc1abeb4b3ba7f88461aaf9c00
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 17:13:38 2012 +0000

    sna: Release the stale GTT mapping after recreating the bo with new tiling
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6200dd3..fc13dfd 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -371,10 +371,16 @@ struct kgem_bo *sna_pixmap_change_tiling(PixmapPtr pixmap, uint32_t tiling)
 				    &box, 1)) {
 		DBG(("%s: copy failed\n", __FUNCTION__));
 		kgem_bo_destroy(&sna->kgem, bo);
-		return false;
+		return NULL;
 	}
 
 	kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+
+	if (priv->mapped) {
+		pixmap->devPrivate.ptr = NULL;
+		priv->mapped = 0;
+	}
+
 	return priv->gpu_bo = bo;
 }
 
commit 8dd913fd3a093b54fc024fea2b31a3db2518db8d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 10:38:43 2012 +0000

    sna: Add reminder about possible future tiling optimisations
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index e7ac215..6200dd3 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1079,8 +1079,8 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
 		     region->extents.y2 - region->extents.y1));
 
 		if ((flags & MOVE_WRITE) == 0 &&
-			   region->extents.x2 - region->extents.x1 == 1 &&
-			   region->extents.y2 - region->extents.y1 == 1) {
+		    region->extents.x2 - region->extents.x1 == 1 &&
+		    region->extents.y2 - region->extents.y1 == 1) {
 			/*  Often associated with synchronisation, KISS */
 			sna_read_boxes(sna,
 				       priv->gpu_bo, 0, 0,
@@ -6753,12 +6753,17 @@ sna_poly_fill_rect_tiled_blt(DrawablePtr drawable,
 
 	tile_width = tile->drawable.width;
 	tile_height = tile->drawable.height;
-	if (tile_width == 1 && tile_height == 1)
+	if ((tile_width | tile_height) == 1)
 		return sna_poly_fill_rect_blt(drawable, bo, damage,
 					      gc, get_pixel(tile),
 					      n, rect,
 					      extents, clipped);
 
+	/* XXX [248]x[238] tiling can be reduced to a pattern fill.
+	 * Also we can do the lg2 reduction for BLT and use repeat modes for
+	 * RENDER.
+	 */
+
 	if (!sna_pixmap_move_to_gpu(tile, MOVE_READ))
 		return FALSE;
 
commit 21948578d0d87f38447ef1ae44fada406949bca8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 03:19:16 2012 +0000

    sna: Disable the inline xRGB to ARGB upload conversion
    
    As we have to upload the dirty data anyway, setting the
    alpha-channel to 0xff should be free. Not so for firefox-asteroids on
    Atom at least.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 2c503d2..eda952f 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1440,6 +1440,9 @@ prepare_blt_put(struct sna *sna,
 						 GXcopy);
 		}
 	} else {
+		if (alpha_fixup)
+			return FALSE; /* XXX */
+
 		if (!sna_pixmap_move_to_cpu(src, MOVE_READ))
 			return FALSE;
 
commit 87f73b043426c47efa7670fb65bdcc7dfcf71fc3
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 01:42:37 2012 +0000

    sna/gen[23]: Tile render fill to oversized bo
    
    If we are forced to perform a render operation to a bo too large to fit
    in the pipeline, copy to an intermediate and split the operation into
    tiles rather than fallback.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index 8f6a164..30dd694 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -112,7 +112,7 @@ static const struct formatinfo {
 static inline bool
 too_large(int width, int height)
 {
-	return (width | height) > MAX_3D_SIZE;
+	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
 }
 
 static inline uint32_t
@@ -2365,10 +2365,21 @@ gen2_render_fill_boxes(struct sna *sna,
 
 	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch < 8 || dst_bo->pitch > 8192 ||
-	    !gen2_check_dst_format(format))
-		return gen2_render_fill_boxes_try_blt(sna, op, format, color,
-						      dst, dst_bo,
-						      box, n);
+	    !gen2_check_dst_format(format)) {
+		DBG(("%s: try blt, too large or incompatible destination\n",
+		     __FUNCTION__));
+		if (gen2_render_fill_boxes_try_blt(sna, op, format, color,
+						   dst, dst_bo,
+						   box, n))
+			return TRUE;
+
+		if (!gen2_check_dst_format(format))
+			return FALSE;
+
+		assert(dst_bo->pitch >= 8);
+		return sna_tiling_fill_boxes(sna, op, format, color,
+					     dst, dst_bo, box, n);
+	}
 
 	if (prefer_blt_fill(sna) &&
 	    gen2_render_fill_boxes_try_blt(sna, op, format, color,
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index fcbe9c7..f025515 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -126,7 +126,7 @@ static const struct formatinfo {
 
 static inline bool too_large(int width, int height)
 {
-	return (width | height) > MAX_3D_SIZE;
+	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
 }
 
 static inline uint32_t gen3_buf_tiling(uint32_t tiling)
@@ -3885,8 +3885,11 @@ gen3_render_fill_boxes_try_blt(struct sna *sna,
 	uint8_t alu = GXcopy;
 	uint32_t pixel;
 
-	if (dst_bo->tiling == I915_TILING_Y)
+	if (dst_bo->tiling == I915_TILING_Y) {
+		DBG(("%s: y-tiling, can't blit\n", __FUNCTION__));
+		assert(!too_large(dst->drawable.width, dst->drawable.height));
 		return FALSE;
+	}
 
 	if (color->alpha >= 0xff00) {
 		if (op == PictOpOver)
@@ -3905,15 +3908,18 @@ gen3_render_fill_boxes_try_blt(struct sna *sna,
 		if (color->alpha <= 0x00ff)
 			alu = GXclear;
 		else if (!sna_get_pixel_from_rgba(&pixel,
-						    color->red,
-						    color->green,
-						    color->blue,
-						    color->alpha,
-						    format))
+						  color->red,
+						  color->green,
+						  color->blue,
+						  color->alpha,
+						  format)) {
+			DBG(("%s: unknown format %x\n", __FUNCTION__, format));
 			return FALSE;
-	} else
+		}
+	} else {
+		DBG(("%s: unhandle op %d\n", __FUNCTION__, alu));
 		return FALSE;
-
+	}
 
 	return sna_blt_fill_boxes(sna, alu,
 				  dst_bo, dst->drawable.bitsPerPixel,
@@ -3958,10 +3964,20 @@ gen3_render_fill_boxes(struct sna *sna,
 
 	if (too_large(dst->drawable.width, dst->drawable.height) ||
 	    dst_bo->pitch > 8192 ||
-	    !gen3_check_dst_format(format))
-		return gen3_render_fill_boxes_try_blt(sna, op, format, color,
-						      dst, dst_bo,
-						      box, n);
+	    !gen3_check_dst_format(format)) {
+		DBG(("%s: try blt, too large or incompatible destination\n",
+		     __FUNCTION__));
+		if (gen3_render_fill_boxes_try_blt(sna, op, format, color,
+						   dst, dst_bo,
+						   box, n))
+			return TRUE;
+
+		if (!gen3_check_dst_format(format))
+			return FALSE;
+
+		return sna_tiling_fill_boxes(sna, op, format, color,
+					     dst, dst_bo, box, n);
+	}
 
 	if (prefer_fill_blt(sna) &&
 	    gen3_render_fill_boxes_try_blt(sna, op, format, color,
@@ -3977,8 +3993,10 @@ gen3_render_fill_boxes(struct sna *sna,
 					     color->green,
 					     color->blue,
 					     color->alpha,
-					     PICT_a8r8g8b8))
+					     PICT_a8r8g8b8)) {
+			assert(0);
 			return FALSE;
+		}
 	}
 	DBG(("%s: using shader for op=%d, format=%x, pixel=%x\n",
 	     __FUNCTION__, op, (int)format, pixel));
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index cf6d947..1df5bde 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2580,10 +2580,12 @@ gen4_render_fill_boxes(struct sna *sna,
 				       pixel, box, n))
 			return TRUE;
 
-		if (dst->drawable.width > 8192 ||
-		    dst->drawable.height > 8192 ||
-		    !gen4_check_dst_format(format))
+		if (!gen4_check_dst_format(format))
 			return FALSE;
+
+		if (dst->drawable.width > 8192 || dst->drawable.height > 8192)
+			return sna_tiling_fill_boxes(sna, op, format, color,
+						     dst, dst_bo, box, n);
 	}
 
 #if NO_FILL_BOXES
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index f0bb187..862f03e 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -282,7 +282,7 @@ gen5_emit_pipelined_pointers(struct sna *sna,
 
 static inline bool too_large(int width, int height)
 {
-	return (width | height) > MAX_3D_SIZE;
+	return width > MAX_3D_SIZE || height > MAX_3D_SIZE;
 }
 
 static int
@@ -2920,9 +2920,12 @@ gen5_render_fill_boxes(struct sna *sna,
 				       pixel, box, n))
 			return TRUE;
 
-		if (too_large(dst->drawable.width, dst->drawable.height) ||
-		    !gen5_check_dst_format(format))
+		if (!gen5_check_dst_format(format))
 			return FALSE;
+
+		if (too_large(dst->drawable.width, dst->drawable.height))
+			return sna_tiling_fill_boxes(sna, op, format, color,
+						     dst, dst_bo, box, n);
 	}
 
 	if (op == PictOpClear)
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index e95cdd6..521b6f2 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -1954,7 +1954,7 @@ gen6_composite_solid_init(struct sna *sna,
 
 static inline bool too_large(int width, int height)
 {
-	return (width | height) > GEN6_MAX_SIZE;
+	return width > GEN6_MAX_SIZE || height > GEN6_MAX_SIZE;
 }
 
 static int
@@ -3191,9 +3191,12 @@ gen6_render_fill_boxes(struct sna *sna,
 				       pixel, box, n))
 			return TRUE;
 
-		if (too_large(dst->drawable.width, dst->drawable.height) ||
-		    !gen6_check_dst_format(format))
+		if (!gen6_check_dst_format(format))
 			return FALSE;
+
+		if (too_large(dst->drawable.width, dst->drawable.height))
+			return sna_tiling_fill_boxes(sna, op, format, color,
+						     dst, dst_bo, box, n);
 	}
 
 #if NO_FILL_BOXES
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 9d17c87..302a1dc 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2048,7 +2048,7 @@ gen7_composite_solid_init(struct sna *sna,
 
 static inline bool too_large(int width, int height)
 {
-	return (width | height) > GEN7_MAX_SIZE;
+	return width > GEN7_MAX_SIZE || height > GEN7_MAX_SIZE;
 }
 
 static int
@@ -3247,9 +3247,12 @@ gen7_render_fill_boxes(struct sna *sna,
 				       pixel, box, n))
 			return TRUE;
 
-		if (too_large(dst->drawable.width, dst->drawable.height) ||
-		    !gen7_check_dst_format(format))
+		if (!gen7_check_dst_format(format))
 			return FALSE;
+
+		if (too_large(dst->drawable.width, dst->drawable.height))
+			return sna_tiling_fill_boxes(sna, op, format, color,
+						     dst, dst_bo, box, n);
 	}
 
 #if NO_FILL_BOXES
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index f780428..19dfdfb 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -495,6 +495,12 @@ Bool sna_tiling_composite(uint32_t op,
 			  int16_t dst_x, int16_t dst_y,
 			  int16_t width, int16_t height,
 			  struct sna_composite_op *tmp);
+Bool sna_tiling_fill_boxes(struct sna *sna,
+			   CARD8 op,
+			   PictFormat format,
+			   const xRenderColor *color,
+			   PixmapPtr dst, struct kgem_bo *dst_bo,
+			   const BoxRec *box, int n);
 
 Bool sna_blt_composite(struct sna *sna,
 		       uint32_t op,
diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index 6e68130..52572bc 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -307,3 +307,104 @@ sna_tiling_composite(uint32_t op,
 	tmp->u.priv = tile;
 	return TRUE;
 }
+
+Bool
+sna_tiling_fill_boxes(struct sna *sna,
+		      CARD8 op,
+		      PictFormat format,
+		      const xRenderColor *color,
+		      PixmapPtr dst, struct kgem_bo *dst_bo,
+		      const BoxRec *box, int n)
+{
+	RegionRec region, tile, this;
+	struct kgem_bo *bo;
+	Bool ret = FALSE;
+
+	pixman_region_init_rects(&region, box, n);
+
+	DBG(("%s (op=%d, format=%x, color=(%04x,%04x,%04x, %04x), tile.size=%d, box=%dx[(%d, %d), (%d, %d)])\n",
+	     __FUNCTION__, op, (int)format,
+	     color->red, color->green, color->blue, color->alpha,
+	     sna->render.max_3d_size, n,
+	     region.extents.x1, region.extents.y1,
+	     region.extents.x2, region.extents.y2));
+
+	for (tile.extents.y1 = tile.extents.y2 = region.extents.y1;
+	     tile.extents.y2 < region.extents.y2;
+	     tile.extents.y1 = tile.extents.y2) {
+		tile.extents.y2 = tile.extents.y1 + sna->render.max_3d_size;
+		if (tile.extents.y2 > region.extents.y2)
+			tile.extents.y2 = region.extents.y2;
+
+		for (tile.extents.x1 = tile.extents.x2 = region.extents.x1;
+		     tile.extents.x2 < region.extents.x2;
+		     tile.extents.x1 = tile.extents.x2) {
+			PixmapRec tmp;
+
+			tile.extents.x2 = tile.extents.x1 + sna->render.max_3d_size;
+			if (tile.extents.x2 > region.extents.x2)
+				tile.extents.x2 = region.extents.x2;
+
+			tile.data = NULL;
+
+			RegionNull(&this);
+			RegionIntersect(&this, &region, &tile);
+			if (!RegionNotEmpty(&this))
+				continue;
+
+			tmp.drawable.width  = this.extents.x2 - this.extents.x1;
+			tmp.drawable.height = this.extents.y2 - this.extents.y1;
+			tmp.drawable.depth  = dst->drawable.depth;
+			tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
+			tmp.devPrivate.ptr = NULL;
+
+			bo = kgem_create_2d(&sna->kgem,
+					    tmp.drawable.width,
+					    tmp.drawable.height,
+					    dst->drawable.bitsPerPixel,
+					    kgem_choose_tiling(&sna->kgem,
+							       I915_TILING_X,
+							       tmp.drawable.width,
+							       tmp.drawable.height,
+							       dst->drawable.bitsPerPixel),
+					    CREATE_SCANOUT);
+			if (bo) {
+				int16_t dx = this.extents.x1;
+				int16_t dy = this.extents.y1;
+
+				assert(bo->pitch <= 8192);
+				assert(bo->tiling != I915_TILING_Y);
+
+				if (!sna->render.copy_boxes(sna, GXcopy,
+							     dst, dst_bo, 0, 0,
+							     &tmp, bo, -dx, -dy,
+							     REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+					goto err;
+
+				RegionTranslate(&this, -dx, -dy);
+				if (!sna->render.fill_boxes(sna, op, format, color,
+							     &tmp, bo,
+							     REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+					goto err;
+
+				if (!sna->render.copy_boxes(sna, GXcopy,
+							     &tmp, bo, 0, 0,
+							     dst, dst_bo, dx, dy,
+							     REGION_RECTS(&this), REGION_NUM_RECTS(&this)))
+					goto err;
+
+				kgem_bo_destroy(&sna->kgem, bo);
+			}
+			RegionUninit(&this);
+		}
+	}
+
+	ret = TRUE;
+	goto done;
+err:
+	kgem_bo_destroy(&sna->kgem, bo);
+	RegionUninit(&this);
+done:
+	pixman_region_fini(&region);
+	return ret;
+}
commit 2ccb31c5a4120efb940168c5e52154add01b0586
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 01:41:09 2012 +0000

    sna: Shrink upoads buffers
    
    If we do not fill the whole upload buffer, we may be able to reuse a
    smaller buffer that is currently bound in the GTT. Ideally, this will
    keep our RSS trim.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 3b1df34..4ecb507 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -44,6 +44,9 @@
 #include <memcheck.h>
 #endif
 
+static struct kgem_bo *
+search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags);
+
 static inline void _list_del(struct list *list)
 {
 	assert(list->prev->next == list);
@@ -94,6 +97,7 @@ static inline void list_replace(struct list *old,
 #endif
 
 #define PAGE_SIZE 4096
+#define PAGE_ALIGN(x) ALIGN(x, PAGE_SIZE)
 #define MAX_GTT_VMA_CACHE 512
 #define MAX_CPU_VMA_CACHE INT16_MAX
 #define MAP_PRESERVE_TIME 10
@@ -105,7 +109,7 @@ static inline void list_replace(struct list *old,
 struct kgem_partial_bo {
 	struct kgem_bo base;
 	void *mem;
-	uint32_t used, alloc;
+	uint32_t used;
 	uint32_t need_io : 1;
 	uint32_t write : 1;
 };
@@ -121,10 +125,10 @@ static bool validate_partials(struct kgem *kgem)
 	list_for_each_entry_safe(bo, next, &kgem->partial, base.list) {
 		if (bo->base.list.next == &kgem->partial)
 			return true;
-		if (bo->alloc - bo->used < next->alloc - next->used) {
+		if (bo->base.size - bo->used < next->base.size - next->used) {
 			ErrorF("this rem: %d, next rem: %d\n",
-			       bo->alloc - bo->used,
-			       next->alloc - next->used);
+			       bo->base.size - bo->used,
+			       next->base.size - next->used);
 			goto err;
 		}
 	}
@@ -133,7 +137,7 @@ static bool validate_partials(struct kgem *kgem)
 err:
 	list_for_each_entry(bo, &kgem->partial, base.list)
 		ErrorF("bo: used=%d / %d, rem=%d\n",
-		       bo->used, bo->alloc, bo->alloc - bo->used);
+		       bo->used, bo->base.size, bo->base.size - bo->used);
 	return false;
 }
 #else
@@ -290,11 +294,10 @@ Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo,
 	if (gem_write(kgem->fd, bo->handle, 0, length, data))
 		return FALSE;
 
-	bo->needs_flush = !bo->flush;
-	if (bo->domain == DOMAIN_GPU) {
-		kgem->sync = false;
+	DBG(("%s: flush=%d, domain=%d\n", __FUNCTION__, bo->flush, bo->domain));
+	bo->needs_flush = bo->flush;
+	if (bo->domain == DOMAIN_GPU)
 		kgem_retire(kgem);
-	}
 	bo->domain = DOMAIN_NONE;
 	return TRUE;
 }
@@ -672,7 +675,7 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 
 	size = *pitch * height;
 	if (relaxed_fencing || tiling == I915_TILING_NONE || kgem->gen >= 40)
-		return ALIGN(size, PAGE_SIZE);
+		return PAGE_ALIGN(size);
 
 	/*  We need to allocate a pot fence region for a tiled buffer. */
 	if (kgem->gen < 30)
@@ -824,6 +827,8 @@ inline static void kgem_bo_move_to_inactive(struct kgem *kgem,
 	assert(!kgem_busy(kgem, bo->handle));
 	assert(!bo->proxy);
 	assert(!bo->io);
+	assert(bo->rq == NULL);
+	assert(bo->domain != DOMAIN_GPU);
 
 	list_move(&bo->list, &kgem->inactive[bo->bucket]);
 	if (bo->map) {
@@ -897,6 +902,7 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	bo->scanout = bo->flush = false;
 
 	assert(list_is_empty(&bo->vma));
+	assert(list_is_empty(&bo->list));
 	if (bo->rq) {
 		DBG(("%s: handle=%d -> active\n", __FUNCTION__, bo->handle));
 		list_move(&bo->list, &kgem->active[bo->bucket]);
@@ -956,8 +962,8 @@ bool kgem_retire(struct kgem *kgem)
 			bo->needs_flush = false;
 			bo->domain = DOMAIN_NONE;
 			bo->rq = NULL;
-			kgem_bo_move_to_inactive(kgem, bo);
 			list_del(&bo->request);
+			kgem_bo_move_to_inactive(kgem, bo);
 		} else
 			kgem_bo_free(kgem, bo);
 
@@ -980,6 +986,7 @@ bool kgem_retire(struct kgem *kgem)
 			bo = list_first_entry(&rq->buffers,
 					      struct kgem_bo,
 					      request);
+
 			list_del(&bo->request);
 			bo->rq = NULL;
 
@@ -1044,14 +1051,14 @@ static void kgem_commit(struct kgem *kgem)
 	struct kgem_bo *bo, *next;
 
 	list_for_each_entry_safe(bo, next, &rq->buffers, request) {
+		DBG(("%s: release handle=%d (proxy? %d)\n",
+		     __FUNCTION__, bo->handle, bo->proxy != NULL));
+
 		assert(!bo->purged);
 
 		bo->presumed_offset = bo->exec->offset;
 		bo->exec = NULL;
 
-		DBG(("%s: release handle=%d (proxy? %d)\n",
-		     __FUNCTION__, bo->handle, bo->proxy != NULL));
-
 		if (!bo->refcnt && !bo->reusable) {
 			kgem_bo_free(kgem, bo);
 			continue;
@@ -1100,36 +1107,116 @@ static void kgem_close_inactive(struct kgem *kgem)
 		kgem_close_list(kgem, &kgem->inactive[i]);
 }
 
+static void bubble_sort_partial(struct kgem *kgem, struct kgem_partial_bo *bo)
+{
+	int remain = bo->base.size - bo->used;
+
+	while (bo->base.list.prev != &kgem->partial) {
+		struct kgem_partial_bo *p;
+
+		p = list_entry(bo->base.list.prev,
+			       struct kgem_partial_bo,
+			       base.list);
+		if (remain <= p->base.size - p->used)
+			break;
+
+		assert(p->base.list.next == &bo->base.list);
+		bo->base.list.prev = p->base.list.prev;
+		p->base.list.prev->next = &bo->base.list;
+		p->base.list.prev = &bo->base.list;
+
+		p->base.list.next = bo->base.list.next;
+		bo->base.list.next->prev = &p->base.list;
+		bo->base.list.next = &p->base.list;
+
+		assert(p->base.list.next->prev == &p->base.list);
+		assert(bo->base.list.prev->next == &bo->base.list);
+	}
+}
+
 static void kgem_finish_partials(struct kgem *kgem)
 {
 	struct kgem_partial_bo *bo, *next;
 
 	list_for_each_entry_safe(bo, next, &kgem->partial, base.list) {
-		if (!bo->base.exec) {
-			if (bo->base.refcnt == 1) {
-				DBG(("%s: discarding unused partial array: %d/%d\n",
-				     __FUNCTION__, bo->used, bo->alloc));
-
-				bo->base.refcnt = 0; /* for valgrind */
-				kgem_bo_free(kgem, &bo->base);
-			}
+		assert(bo->base.domain != DOMAIN_GPU);
+		if (!bo->write) {
+			assert(bo->base.exec);
+			goto decouple;
+		}
 
+		if (!bo->base.exec)
 			continue;
+
+		if (!bo->used) {
+			/* Unless we replace the handle in the execbuffer,
+			 * then this bo will become active. So decouple it
+			 * from the partial list and track it in the normal
+			 * manner.
+			 */
+			goto decouple;
+		}
+
+		assert(bo->base.rq == kgem->next_request);
+		if (bo->need_io && bo->used < bo->base.size / 2) {
+			int size = PAGE_ALIGN(bo->used);
+			struct kgem_bo *shrink;
+
+			shrink = search_linear_cache(kgem, size,
+						     CREATE_INACTIVE);
+			if (shrink) {
+				int n;
+
+				DBG(("%s: shrnking %d to %d, handle %d to %d\n",
+				     __FUNCTION__,
+				     bo->base.size, shrink->size,
+				     bo->base.handle, shrink->handle));
+
+				gem_write(kgem->fd, shrink->handle,
+					  0, bo->used, bo->mem);
+
+				for (n = 0; n < kgem->nreloc; n++) {
+					if (kgem->reloc[n].target_handle == bo->base.handle) {
+						kgem->reloc[n].target_handle = shrink->handle;
+						kgem->reloc[n].presumed_offset = shrink->presumed_offset;
+						kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] =
+							kgem->reloc[n].delta + shrink->presumed_offset;
+					}
+				}
+
+				bo->base.exec->handle = shrink->handle;
+				bo->base.exec->offset = shrink->presumed_offset;
+				shrink->exec = bo->base.exec;
+				shrink->rq = bo->base.rq;
+				list_replace(&bo->base.request,
+					     &shrink->request);
+				list_init(&bo->base.request);
+
+				bo->base.exec = NULL;
+				bo->base.rq = NULL;
+				bo->used = 0;
+
+				bubble_sort_partial(kgem, bo);
+				continue;
+			}
 		}
 
-		list_del(&bo->base.list);
 		if (bo->need_io) {
 			DBG(("%s: handle=%d, uploading %d/%d\n",
-			     __FUNCTION__, bo->base.handle, bo->used, bo->alloc));
+			     __FUNCTION__, bo->base.handle, bo->used, bo->base.size));
 			assert(!kgem_busy(kgem, bo->base.handle));
 			gem_write(kgem->fd, bo->base.handle,
 				  0, bo->used, bo->mem);
 			bo->need_io = 0;
 		}
 
-		VG(VALGRIND_MAKE_MEM_NOACCESS(bo->mem, bo->alloc));
+		VG(VALGRIND_MAKE_MEM_NOACCESS(bo->mem, bo->base.size));
+decouple:
+		list_del(&bo->base.list);
 		kgem_bo_unref(kgem, &bo->base);
 	}
+
+	assert(validate_partials(kgem));
 }
 
 static void kgem_cleanup(struct kgem *kgem)
@@ -1156,6 +1243,7 @@ static void kgem_cleanup(struct kgem *kgem)
 			bo = list_first_entry(&rq->buffers,
 					      struct kgem_bo,
 					      request);
+
 			list_del(&bo->request);
 			bo->rq = NULL;
 			bo->domain = DOMAIN_NONE;
@@ -1215,13 +1303,13 @@ void kgem_reset(struct kgem *kgem)
 				list_first_entry(&rq->buffers,
 						 struct kgem_bo,
 						 request);
+			list_del(&bo->request);
 
 			bo->binding.offset = 0;
 			bo->exec = NULL;
 			bo->dirty = false;
 			bo->rq = NULL;
-
-			list_del(&bo->request);
+			bo->domain = DOMAIN_NONE;
 
 			if (!bo->refcnt) {
 				DBG(("%s: discarding handle=%d\n",
@@ -1299,9 +1387,6 @@ void _kgem_submit(struct kgem *kgem)
 	assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc));
 	assert(kgem->nexec < ARRAY_SIZE(kgem->exec));
 	assert(kgem->nfence <= kgem->fence_max);
-#if DEBUG_BATCH
-	__kgem_batch_debug(kgem, batch_end);
-#endif
 
 	rq = kgem->next_request;
 	if (kgem->surface != kgem->max_batch_size)
@@ -1313,6 +1398,8 @@ void _kgem_submit(struct kgem *kgem)
 		uint32_t handle = rq->bo->handle;
 		int i;
 
+		assert(!rq->bo->needs_flush);
+
 		i = kgem->nexec++;
 		kgem->exec[i].handle = handle;
 		kgem->exec[i].relocation_count = kgem->nreloc;
@@ -1329,7 +1416,10 @@ void _kgem_submit(struct kgem *kgem)
 		kgem_fixup_self_relocs(kgem, rq->bo);
 		kgem_finish_partials(kgem);
 
-		assert(!rq->bo->needs_flush);
+#if DEBUG_BATCH
+		__kgem_batch_debug(kgem, batch_end);
+#endif
+
 		if (kgem_batch_write(kgem, handle, size) == 0) {
 			struct drm_i915_gem_execbuffer2 execbuf;
 			int ret, retry = 3;
@@ -1470,8 +1560,8 @@ static void kgem_expire_partial(struct kgem *kgem)
 		if (bo->base.refcnt > 1 || bo->base.exec)
 			continue;
 
-		DBG(("%s: discarding unused partial array: %d/%d\n",
-		     __FUNCTION__, bo->used, bo->alloc));
+		DBG(("%s: discarding unused partial buffer: %d/%d, write? %d\n",
+		     __FUNCTION__, bo->used, bo->base.size, bo->write));
 		list_del(&bo->base.list);
 		kgem_bo_unref(kgem, &bo->base);
 	}
@@ -1653,8 +1743,8 @@ search_linear_cache(struct kgem *kgem, unsigned int size, unsigned flags)
 			DBG(("  %s: found handle=%d (size=%d) in linear vma cache\n",
 			     __FUNCTION__, bo->handle, bo->size));
 			assert(use_active || bo->domain != DOMAIN_GPU);
-			assert(!bo->needs_flush || use_active);
-			//assert(use_active || !kgem_busy(kgem, bo->handle));
+			assert(!bo->needs_flush);
+			//assert(!kgem_busy(kgem, bo->handle));
 			return bo;
 		}
 	}
@@ -1793,7 +1883,7 @@ struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size)
 
 	DBG(("%s(%d)\n", __FUNCTION__, size));
 
-	size = ALIGN(size, PAGE_SIZE);
+	size = PAGE_ALIGN(size);
 	bo = search_linear_cache(kgem, size, CREATE_INACTIVE);
 	if (bo)
 		return kgem_bo_reference(bo);
@@ -1826,7 +1916,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 				     __FUNCTION__, width*bpp/8));
 				tiling = I915_TILING_NONE;
 				goto done;
-			} else if (width > 2048 || height > 2048) {
+			} else if ((width|height) > 2048) {
 				DBG(("%s: large buffer (%dx%d), forcing TILING_X\n",
 				     __FUNCTION__, width, height));
 				tiling = -I915_TILING_X;
@@ -1837,7 +1927,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 			DBG(("%s: large pitch [%d], forcing TILING_X\n",
 			     __FUNCTION__, width*bpp/8));
 			tiling = -I915_TILING_X;
-		} else if (tiling && (width > 8192 || height > 8192)) {
+		} else if (tiling && (width|height) > 8192) {
 			DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n",
 			     __FUNCTION__, width, height));
 			tiling = -I915_TILING_X;
@@ -2216,31 +2306,8 @@ static void _kgem_bo_delete_partial(struct kgem *kgem, struct kgem_bo *bo)
 		list_del(&io->base.list);
 		--io->base.refcnt;
 	} else if (bo->delta + bo->size == io->used) {
-		int remain;
-
 		io->used = bo->delta;
-		remain = io->alloc - io->used;
-		while (io->base.list.prev != &kgem->partial) {
-			struct kgem_partial_bo *p;
-
-			p = list_entry(io->base.list.prev,
-				       struct kgem_partial_bo,
-				       base.list);
-			if (remain <= p->alloc - p->used)
-				break;
-
-			assert(p->base.list.next == &io->base.list);
-			io->base.list.prev = p->base.list.prev;
-			p->base.list.prev->next = &io->base.list;
-			p->base.list.prev = &io->base.list;
-
-			p->base.list.next = io->base.list.next;
-			io->base.list.next->prev = &p->base.list;
-			io->base.list.next = &p->base.list;
-
-			assert(p->base.list.next->prev == &p->base.list);
-			assert(io->base.list.prev->next == &io->base.list);
-		}
+		bubble_sort_partial(kgem, io);
 	}
 
 	assert(validate_partials(kgem));
@@ -2509,10 +2576,8 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot)
 		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
 
 		bo->needs_flush = !bo->flush;
-		if (bo->domain == DOMAIN_GPU) {
-			kgem->sync = false;
+		if (bo->domain == DOMAIN_GPU)
 			kgem_retire(kgem);
-		}
 		bo->domain = DOMAIN_GTT;
 	}
 
@@ -2668,11 +2733,9 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
 
 		drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain);
 		assert(!kgem_busy(kgem, bo->handle));
-		bo->needs_flush = !bo->flush;
-		if (bo->domain == DOMAIN_GPU) {
-			kgem->sync = false;
+		bo->needs_flush = bo->flush;
+		if (bo->domain == DOMAIN_GPU)
 			kgem_retire(kgem);
-		}
 		bo->domain = DOMAIN_CPU;
 	}
 }
@@ -2779,15 +2842,15 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 	list_for_each_entry(bo, &kgem->partial, base.list) {
 		if (flags == KGEM_BUFFER_LAST && bo->write) {
 			/* We can reuse any write buffer which we can fit */
-			if (size <= bo->alloc) {
+			if (size <= bo->base.size) {
 				if (bo->base.refcnt == 1 && bo->base.exec) {
 					DBG(("%s: reusing write buffer for read of %d bytes? used=%d, total=%d\n",
-					     __FUNCTION__, size, bo->used, bo->alloc));
+					     __FUNCTION__, size, bo->used, bo->base.size));
 					offset = 0;
 					goto done;
-				} else if (bo->used + size <= bo->alloc) {
+				} else if (bo->used + size <= bo->base.size) {
 					DBG(("%s: reusing unfinished write buffer for read of %d bytes? used=%d, total=%d\n",
-					     __FUNCTION__, size, bo->used, bo->alloc));
+					     __FUNCTION__, size, bo->used, bo->base.size));
 					offset = bo->used;
 					goto done;
 				}
@@ -2800,23 +2863,16 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			continue;
 		}
 
-		if (bo->base.refcnt == 1 && bo->base.exec == NULL) {
-			DBG(("%s: discarding unfinished buffer? used=%d, total=%d\n",
-			     __FUNCTION__, bo->used, bo->alloc));
-			/* no users, so reset */
-			bo->used = 0;
-		}
-
-		if (bo->used + size <= bo->alloc) {
+		if (bo->used + size <= bo->base.size) {
 			DBG(("%s: reusing partial buffer? used=%d + size=%d, total=%d\n",
-			     __FUNCTION__, bo->used, size, bo->alloc));
+			     __FUNCTION__, bo->used, size, bo->base.size));
 			offset = bo->used;
 			bo->used += size;
 			goto done;
 		}
 
 		DBG(("%s: too small (%d < %d)\n",
-		     __FUNCTION__, bo->alloc - bo->used, size));
+		     __FUNCTION__, bo->base.size - bo->used, size));
 		break;
 	}
 
@@ -2884,7 +2940,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		if (handle) {
 			__kgem_bo_init(&bo->base, handle, alloc);
 			bo->base.vmap = true;
-			bo->need_io = 0;
+			bo->need_io = false;
 			bo->mem = bo + 1;
 		} else {
 			free(bo);
@@ -2933,8 +2989,10 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		bo->base.io = true;
 	}
 	bo->base.reusable = false;
+	assert(bo->base.size == alloc);
+	assert(!bo->need_io || !bo->base.needs_flush);
+	assert(!bo->need_io || bo->base.domain != DOMAIN_GPU);
 
-	bo->alloc = alloc;
 	bo->used = size;
 	bo->write = write;
 	offset = 0;
@@ -2945,7 +3003,7 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 
 done:
 	/* adjust the position within the list to maintain decreasing order */
-	alloc = bo->alloc - bo->used;
+	alloc = bo->base.size - bo->used;
 	{
 		struct kgem_partial_bo *p, *first;
 
@@ -2953,9 +3011,9 @@ done:
 					     struct kgem_partial_bo,
 					     base.list);
 		while (&p->base.list != &kgem->partial &&
-		       alloc < p->alloc - p->used) {
+		       alloc < p->base.size - p->used) {
 			DBG(("%s: this=%d, right=%d\n",
-			     __FUNCTION__, alloc, p->alloc -p->used));
+			     __FUNCTION__, alloc, p->base.size -p->used));
 			p = list_first_entry(&p->base.list,
 					     struct kgem_partial_bo,
 					     base.list);
@@ -2995,14 +3053,14 @@ struct kgem_bo *kgem_create_buffer_2d(struct kgem *kgem,
 		bo->size -= stride;
 
 		/* And bubble-sort the partial back into place */
-		remain = io->alloc - io->used;
+		remain = io->base.size - io->used;
 		while (io->base.list.prev != &kgem->partial) {
 			struct kgem_partial_bo *p;
 
 			p = list_entry(io->base.list.prev,
 				       struct kgem_partial_bo,
 				       base.list);
-			if (remain <= p->alloc - p->used)
+			if (remain <= p->base.size - p->used)
 				break;
 
 			assert(p->base.list.next == &io->base.list);
@@ -3128,11 +3186,9 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 			 bo->base.handle, (char *)(bo+1)+offset,
 			 offset, length);
 		assert(!kgem_busy(kgem, bo->base.handle));
-		bo->base.needs_flush = !bo->base.flush;
-		if (bo->base.domain == DOMAIN_GPU) {
-			kgem->sync = false;
+		bo->base.needs_flush = bo->base.flush;
+		if (bo->base.domain == DOMAIN_GPU)
 			kgem_retire(kgem);
-		}
 		bo->base.domain = DOMAIN_NONE;
 	} else
 		kgem_bo_sync__cpu(kgem, &bo->base);
@@ -3198,7 +3254,7 @@ kgem_replace_bo(struct kgem *kgem,
 	assert(src->tiling == I915_TILING_NONE);
 
 	size = height * pitch;
-	size = ALIGN(size, PAGE_SIZE);
+	size = PAGE_ALIGN(size);
 
 	dst = search_linear_cache(kgem, size, 0);
 	if (dst == NULL)
commit 572cc76be5c78102166f2b3640640e21acdfa129
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 09:13:34 2012 +0000

    sna: Destroy the counter damage after a reduction-to-all
    
    If, for instance, we reduce the GPU damage to all we know that there can
    be no CPU damage even though it may still have a region with a list of
    subtractions. Take advantage of this knowledge and cheaply discard that
    damage without having to evaluate it.
    
    This should prevent a paranoid assertion that there is no cpu damage
    when discarding the CPU bo for an active pixmap.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index bcffecb..e7ac215 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -298,9 +298,14 @@ static inline uint32_t default_tiling(PixmapPtr pixmap)
 		return I915_TILING_X;
 	}
 
-	return sna_damage_is_all(&priv->cpu_damage,
+	if (sna_damage_is_all(&priv->cpu_damage,
 				 pixmap->drawable.width,
-				 pixmap->drawable.height) ? I915_TILING_Y : sna->default_tiling;
+				 pixmap->drawable.height)) {
+		sna_damage_destroy(&priv->gpu_damage);
+		return I915_TILING_Y;
+	}
+
+	return sna->default_tiling;
 }
 
 constant static uint32_t sna_pixmap_choose_tiling(PixmapPtr pixmap)
@@ -1919,10 +1924,8 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 
 	if (!priv->pinned && priv->gpu_bo &&
 	    region_subsumes_gpu_damage(region, priv) &&
-	    kgem_bo_map_will_stall(&sna->kgem, priv->gpu_bo)) {
-		sna_damage_destroy(&priv->gpu_damage);
+	    kgem_bo_map_will_stall(&sna->kgem, priv->gpu_bo))
 		sna_pixmap_free_gpu(sna, priv);
-	}
 
 	/* XXX performing the upload inplace is currently about 20x slower
 	 * for putimage10 on gen6 -- mostly due to slow page faulting in kernel.
@@ -9663,6 +9666,7 @@ static void sna_accel_inactive(struct sna *sna)
 				      priv->pixmap->drawable.height)) {
 			DBG(("%s: discarding inactive CPU shadow\n",
 			     __FUNCTION__));
+			sna_damage_destroy(&priv->cpu_damage);
 			sna_pixmap_free_cpu(sna, priv);
 			list_add(&priv->inactive, &preserve);
 		} else {
commit 4a255e13164440b797e5ac11dcbf72f0827a3094
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 16:34:23 2012 +0000

    sna: Replace the free-inactive-gpu-bo with the generic code
    
    The function was semantically equivalent to moving the pixmap to the CPU
    for writing, so replace it with a call to the generic function.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 6b69a6e..bcffecb 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -163,7 +163,7 @@ sna_copy_init_blt(struct sna_copy_op *copy,
 	return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy);
 }
 
-static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv)
+static void sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
 {
 	sna_damage_destroy(&priv->gpu_damage);
 
@@ -743,7 +743,7 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
 				kgem_retire(&sna->kgem);
 
 			if (kgem_bo_is_busy(priv->gpu_bo)) {
-				sna_pixmap_destroy_gpu_bo(sna, priv);
+				sna_pixmap_free_gpu(sna, priv);
 				if (!sna_pixmap_move_to_gpu(pixmap, MOVE_WRITE))
 					goto skip_inplace_map;
 			}
@@ -827,7 +827,7 @@ skip_inplace_map:
 		sna_damage_all(&priv->cpu_damage,
 			       pixmap->drawable.width,
 			       pixmap->drawable.height);
-		sna_pixmap_destroy_gpu_bo(sna, priv);
+		sna_pixmap_free_gpu(sna, priv);
 
 		if (priv->flush)
 			list_move(&priv->list, &sna->dirty_pixmaps);
@@ -1158,7 +1158,7 @@ done:
 				      pixmap->drawable.width,
 				      pixmap->drawable.height)) {
 			DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
-			sna_pixmap_destroy_gpu_bo(sna, priv);
+			sna_pixmap_free_gpu(sna, priv);
 		}
 		if (priv->flush)
 			list_move(&priv->list, &sna->dirty_pixmaps);
@@ -1921,7 +1921,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 	    region_subsumes_gpu_damage(region, priv) &&
 	    kgem_bo_map_will_stall(&sna->kgem, priv->gpu_bo)) {
 		sna_damage_destroy(&priv->gpu_damage);
-		sna_pixmap_destroy_gpu_bo(sna, priv);
+		sna_pixmap_free_gpu(sna, priv);
 	}
 
 	/* XXX performing the upload inplace is currently about 20x slower
@@ -2015,7 +2015,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 		sna_damage_all(&priv->cpu_damage,
 			       pixmap->drawable.width,
 			       pixmap->drawable.height);
-		sna_pixmap_destroy_gpu_bo(sna, priv);
+		sna_pixmap_free_gpu(sna, priv);
 	} else {
 		sna_damage_subtract(&priv->gpu_damage, region);
 		sna_damage_add(&priv->cpu_damage, region);
@@ -2024,7 +2024,7 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
 				      pixmap->drawable.width,
 				      pixmap->drawable.height)) {
 			DBG(("%s: replaced entire pixmap\n", __FUNCTION__));
-			sna_pixmap_destroy_gpu_bo(sna, priv);
+			sna_pixmap_free_gpu(sna, priv);
 		}
 	}
 	if (priv->flush)
@@ -9609,61 +9609,6 @@ static void sna_accel_expire(struct sna *sna)
 		_sna_accel_disarm_timer(sna, EXPIRE_TIMER);
 }
 
-static bool
-sna_pixmap_free_gpu(struct sna *sna, struct sna_pixmap *priv)
-{
-	PixmapPtr pixmap = priv->pixmap;
-
-	assert (!priv->flush);
-
-	if (priv->mapped) {
-		pixmap->devPrivate.ptr = NULL;
-		priv->mapped = 0;
-	}
-
-	if (pixmap->devPrivate.ptr == NULL &&
-	    !sna_pixmap_alloc_cpu(sna, pixmap, priv, priv->gpu_damage != NULL))
-		return false;
-
-	if (priv->gpu_damage) {
-		BoxPtr box;
-		int n;
-
-		DBG(("%s: flushing GPU damage\n", __FUNCTION__));
-
-		n = sna_damage_get_boxes(priv->gpu_damage, &box);
-		if (n) {
-			struct kgem_bo *dst_bo;
-			Bool ok = FALSE;
-
-			dst_bo = NULL;
-			if (sna->kgem.gen >= 30)
-				dst_bo = priv->cpu_bo;
-			if (dst_bo)
-				ok = sna->render.copy_boxes(sna, GXcopy,
-							    pixmap, priv->gpu_bo, 0, 0,
-							    pixmap, dst_bo, 0, 0,
-							    box, n);
-			if (!ok)
-				sna_read_boxes(sna,
-					       priv->gpu_bo, 0, 0,
-					       pixmap, 0, 0,
-					       box, n);
-		}
-
-		__sna_damage_destroy(priv->gpu_damage);
-		priv->gpu_damage = NULL;
-	}
-
-	sna_damage_all(&priv->cpu_damage,
-		       pixmap->drawable.width,
-		       pixmap->drawable.height);
-	sna_pixmap_destroy_gpu_bo(sna, priv);
-
-	priv->source_count = SOURCE_BIAS;
-	return true;
-}
-
 static void sna_accel_inactive(struct sna *sna)
 {
 	struct sna_pixmap *priv;
@@ -9723,7 +9668,8 @@ static void sna_accel_inactive(struct sna *sna)
 		} else {
 			DBG(("%s: discarding inactive GPU bo handle=%d\n",
 			     __FUNCTION__, priv->gpu_bo->handle));
-			if (!sna_pixmap_free_gpu(sna, priv))
+			if (!sna_pixmap_move_to_cpu(priv->pixmap,
+						    MOVE_READ | MOVE_WRITE))
 				list_add(&priv->inactive, &preserve);
 		}
 	}
commit c1d403266a611a68081690d19f6debb8e343095f
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 00:41:05 2012 +0000

    sna: Allow for xRGB uploads to replace their target bo
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index 4290659..07ae683 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -665,6 +665,11 @@ struct kgem_bo *sna_replace(struct sna *sna,
 			    PixmapPtr pixmap,
 			    struct kgem_bo *bo,
 			    const void *src, int stride);
+struct kgem_bo *sna_replace__xor(struct sna *sna,
+				 PixmapPtr pixmap,
+				 struct kgem_bo *bo,
+				 const void *src, int stride,
+				 uint32_t and, uint32_t or);
 
 Bool
 sna_compute_composite_extents(BoxPtr extents,
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 64fcd06..2c503d2 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1289,18 +1289,34 @@ blt_put_composite_with_alpha(struct sna *sna,
 	int16_t dst_y = r->dst.y + op->dst.y;
 	int16_t src_x = r->src.x + op->u.blt.sx;
 	int16_t src_y = r->src.y + op->u.blt.sy;
-	BoxRec box;
 
-	box.x1 = dst_x;
-	box.y1 = dst_y;
-	box.x2 = dst_x + r->width;
-	box.y2 = dst_y + r->height;
+	if (!dst_priv->pinned &&
+	    dst_x <= 0 && dst_y <= 0 &&
+	    dst_x + r->width >= op->dst.width &&
+	    dst_y + r->height >= op->dst.height) {
+		int bpp = dst->drawable.bitsPerPixel / 8;
+
+		data += (src_x - dst_x) * bpp;
+		data += (src_y - dst_y) * pitch;
+
+		dst_priv->gpu_bo =
+			sna_replace__xor(sna, op->dst.pixmap, dst_priv->gpu_bo,
+					 data, pitch,
+					 0xffffffff, op->u.blt.pixel);
+	} else {
+		BoxRec box;
+
+		box.x1 = dst_x;
+		box.y1 = dst_y;
+		box.x2 = dst_x + r->width;
+		box.y2 = dst_y + r->height;
 
-	sna_write_boxes__xor(sna, dst,
-			     dst_priv->gpu_bo, 0, 0,
-			     data, pitch, src_x, src_y,
-			     &box, 1,
-			     0xffffffff, op->u.blt.pixel);
+		sna_write_boxes__xor(sna, dst,
+				     dst_priv->gpu_bo, 0, 0,
+				     data, pitch, src_x, src_y,
+				     &box, 1,
+				     0xffffffff, op->u.blt.pixel);
+	}
 }
 
 fastcall static void
@@ -1309,18 +1325,35 @@ blt_put_composite_box_with_alpha(struct sna *sna,
 				 const BoxRec *box)
 {
 	PixmapPtr src = op->u.blt.src_pixmap;
+	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
 
 	DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
 	     op->u.blt.sx, op->u.blt.sy,
 	     op->dst.x, op->dst.y));
 
-	sna_write_boxes__xor(sna, op->dst.pixmap,
-			     op->dst.bo, op->dst.x, op->dst.y,
-			     src->devPrivate.ptr,
-			     src->devKind,
-			     op->u.blt.sx, op->u.blt.sy,
-			     box, 1,
-			     0xffffffff, op->u.blt.pixel);
+	if (!dst_priv->pinned &&
+	    box->x2 - box->x1 == op->dst.width &&
+	    box->y2 - box->y1 == op->dst.height) {
+		int pitch = src->devKind;
+		int bpp = src->drawable.bitsPerPixel / 8;
+		char *data = src->devPrivate.ptr;
+
+		data += (box->y1 + op->u.blt.sy) * pitch;
+		data += (box->x1 + op->u.blt.sx) * bpp;
+
+		dst_priv->gpu_bo =
+			sna_replace__xor(sna, op->dst.pixmap, op->dst.bo,
+					 data, pitch,
+					 0xffffffff, op->u.blt.pixel);
+	} else {
+		sna_write_boxes__xor(sna, op->dst.pixmap,
+				     op->dst.bo, op->dst.x, op->dst.y,
+				     src->devPrivate.ptr,
+				     src->devKind,
+				     op->u.blt.sx, op->u.blt.sy,
+				     box, 1,
+				     0xffffffff, op->u.blt.pixel);
+	}
 }
 
 static void
@@ -1329,19 +1362,36 @@ blt_put_composite_boxes_with_alpha(struct sna *sna,
 				   const BoxRec *box, int n)
 {
 	PixmapPtr src = op->u.blt.src_pixmap;
+	struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap);
 
 	DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
 	     op->u.blt.sx, op->u.blt.sy,
 	     op->dst.x, op->dst.y,
 	     box->x1, box->y1, box->x2, box->y2, n));
 
-	sna_write_boxes__xor(sna, op->dst.pixmap,
-			     op->dst.bo, op->dst.x, op->dst.y,
-			     src->devPrivate.ptr,
-			     src->devKind,
-			     op->u.blt.sx, op->u.blt.sy,
-			     box, n,
-			     0xffffffff, op->u.blt.pixel);
+	if (n == 1 && !dst_priv->pinned &&
+	    box->x2 - box->x1 == op->dst.width &&
+	    box->y2 - box->y1 == op->dst.height) {
+		int pitch = src->devKind;
+		int bpp = src->drawable.bitsPerPixel / 8;
+		char *data = src->devPrivate.ptr;
+
+		data += (box->y1 + op->u.blt.sy) * pitch;
+		data += (box->x1 + op->u.blt.sx) * bpp;
+
+		dst_priv->gpu_bo =
+			sna_replace__xor(sna, op->dst.pixmap, op->dst.bo,
+					 data, pitch,
+					 0xffffffff, op->u.blt.pixel);
+	} else {
+		sna_write_boxes__xor(sna, op->dst.pixmap,
+				     op->dst.bo, op->dst.x, op->dst.y,
+				     src->devPrivate.ptr,
+				     src->devKind,
+				     op->u.blt.sx, op->u.blt.sy,
+				     box, n,
+				     0xffffffff, op->u.blt.pixel);
+	}
 }
 
 static Bool
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index dbf1774..50fae25 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -824,3 +824,48 @@ struct kgem_bo *sna_replace(struct sna *sna,
 
 	return bo;
 }
+
+struct kgem_bo *sna_replace__xor(struct sna *sna,
+				 PixmapPtr pixmap,
+				 struct kgem_bo *bo,
+				 const void *src, int stride,
+				 uint32_t and, uint32_t or)
+{
+	struct kgem *kgem = &sna->kgem;
+	void *dst;
+
+	DBG(("%s(handle=%d, %dx%d, bpp=%d, tiling=%d)\n",
+	     __FUNCTION__, bo->handle,
+	     pixmap->drawable.width,
+	     pixmap->drawable.height,
+	     pixmap->drawable.bitsPerPixel,
+	     bo->tiling));
+
+	if (kgem_bo_is_busy(bo)) {
+		struct kgem_bo *new_bo;
+
+		new_bo = kgem_create_2d(kgem,
+					pixmap->drawable.width,
+					pixmap->drawable.height,
+					pixmap->drawable.bitsPerPixel,
+					bo->tiling,
+					CREATE_GTT_MAP | CREATE_INACTIVE);
+		if (new_bo) {
+			kgem_bo_destroy(kgem, bo);
+			bo = new_bo;
+		}
+	}
+
+	dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+	if (dst) {
+		memcpy_xor(src, dst, pixmap->drawable.bitsPerPixel,
+			   stride, bo->pitch,
+			   0, 0,
+			   0, 0,
+			   pixmap->drawable.width,
+			   pixmap->drawable.height,
+			   and, or);
+	}
+
+	return bo;
+}
commit 406776cd955c3c384d4a537300e21eebe4413666
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Jan 10 00:25:14 2012 +0000

    sna: Rearrange buffer allocation to prefer llc bo over vmaps
    
    If we can create snoopable bo, we prefer to use those as creating a vmap
    forces a new bo creation increasing GTT pressure.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6801c59..3b1df34 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2581,6 +2581,7 @@ uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo)
 }
 
 #if defined(USE_VMAP) && defined(I915_PARAM_HAS_VMAP)
+#define HAVE_VMAP 1
 static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
 {
 	struct drm_i915_gem_vmap vmap;
@@ -2627,6 +2628,7 @@ struct kgem_bo *kgem_create_map(struct kgem *kgem,
 	return bo;
 }
 #else
+#define HAVE_VMAP 0
 static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only)
 {
 	return 0;
@@ -2821,23 +2823,6 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 	alloc = (flags & KGEM_BUFFER_LAST) ? 4096 : 32 * 1024;
 	alloc = ALIGN(size, alloc);
 
-	handle = 0;
-	if (kgem->has_vmap) {
-		bo = malloc(sizeof(*bo) + alloc);
-		if (bo == NULL)
-			return NULL;
-
-		handle = gem_vmap(kgem->fd, bo+1, alloc, write);
-		if (handle) {
-			__kgem_bo_init(&bo->base, handle, alloc);
-			bo->base.vmap = true;
-			bo->need_io = 0;
-			bo->mem = bo + 1;
-			goto init;
-		} else
-			free(bo);
-	}
-
 	if (!DEBUG_NO_LLC && kgem->gen >= 60) {
 		struct kgem_bo *old;
 
@@ -2890,6 +2875,21 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		bo->base.io = true;
 
 		alloc = bo->base.size;
+	} else if (HAVE_VMAP && kgem->has_vmap) {
+		bo = malloc(sizeof(*bo) + alloc);
+		if (bo == NULL)
+			return NULL;
+
+		handle = gem_vmap(kgem->fd, bo+1, alloc, write);
+		if (handle) {
+			__kgem_bo_init(&bo->base, handle, alloc);
+			bo->base.vmap = true;
+			bo->need_io = 0;
+			bo->mem = bo + 1;
+		} else {
+			free(bo);
+			return NULL;
+		}
 	} else {
 		struct kgem_bo *old;
 
@@ -2932,7 +2932,6 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		}
 		bo->base.io = true;
 	}
-init:
 	bo->base.reusable = false;
 
 	bo->alloc = alloc;