xf86-video-intel: 10 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_composite.c src/sna/sna.h src/sna/sna_render.c src/sna/sna_render.h

Chris Wilson ickle at kemper.freedesktop.org
Thu Jan 26 04:58:26 PST 2012


 src/sna/gen2_render.c   |    2 
 src/sna/gen3_render.c   |   62 ++++---
 src/sna/gen4_render.c   |    2 
 src/sna/gen5_render.c   |    2 
 src/sna/gen6_render.c   |    4 
 src/sna/gen7_render.c   |    4 
 src/sna/kgem.c          |  296 ++++++++++++++++++++----------------
 src/sna/kgem.h          |   16 -
 src/sna/sna.h           |    3 
 src/sna/sna_accel.c     |   58 +++----
 src/sna/sna_composite.c |    3 
 src/sna/sna_render.c    |  389 ++++++++++++++++++++----------------------------
 src/sna/sna_render.h    |    2 
 13 files changed, 418 insertions(+), 425 deletions(-)

New commits:
commit 35c0ef586bf508c577642d772f18eae0b64cfd44
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 26 11:20:03 2012 +0000

    sna/gen3: Use cpu bo if already in use
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 95a79b2..af83966 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2337,32 +2337,42 @@ gen3_composite_set_target(struct sna *sna,
 	op->dst.height = op->dst.pixmap->drawable.height;
 	priv = sna_pixmap(op->dst.pixmap);
 
-	priv = sna_pixmap_force_to_gpu(op->dst.pixmap, MOVE_READ | MOVE_WRITE);
-	if (priv == NULL)
-		return FALSE;
+	op->dst.bo = NULL;
+	priv = sna_pixmap(op->dst.pixmap);
+	if (priv &&
+	    priv->gpu_bo == NULL &&
+	    priv->cpu_bo && priv->cpu_bo->domain != DOMAIN_CPU) {
+		op->dst.bo = priv->cpu_bo;
+		op->damage = &priv->cpu_damage;
+	}
+	if (op->dst.bo == NULL) {
+		priv = sna_pixmap_force_to_gpu(op->dst.pixmap, MOVE_READ | MOVE_WRITE);
+		if (priv == NULL)
+			return FALSE;
 
-	/* For single-stream mode there should be no minimum alignment
-	 * required, except that the width must be at least 2 elements.
-	 */
-	if (priv->gpu_bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
-		struct kgem_bo *bo;
+		/* For single-stream mode there should be no minimum alignment
+		 * required, except that the width must be at least 2 elements.
+		 */
+		if (priv->gpu_bo->pitch < 2*op->dst.pixmap->drawable.bitsPerPixel) {
+			struct kgem_bo *bo;
 
-		if (priv->pinned)
-			return FALSE;
+			if (priv->pinned)
+				return FALSE;
 
-		bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
-				     op->dst.width, op->dst.height,
-				     2*op->dst.pixmap->drawable.bitsPerPixel,
-				     op->dst.pixmap->drawable.bitsPerPixel);
-		if (bo == NULL)
-			return FALSE;
+			bo = kgem_replace_bo(&sna->kgem, priv->gpu_bo,
+					     op->dst.width, op->dst.height,
+					     2*op->dst.pixmap->drawable.bitsPerPixel,
+					     op->dst.pixmap->drawable.bitsPerPixel);
+			if (bo == NULL)
+				return FALSE;
 
-		kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
-		priv->gpu_bo = bo;
-	}
+			kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
+			priv->gpu_bo = bo;
+		}
 
-	op->dst.bo = priv->gpu_bo;
-	op->damage = &priv->gpu_damage;
+		op->dst.bo = priv->gpu_bo;
+		op->damage = &priv->gpu_damage;
+	}
 	if (sna_damage_is_all(op->damage, op->dst.width, op->dst.height))
 		op->damage = NULL;
 
@@ -2475,7 +2485,9 @@ gen3_composite_fallback(struct sna *sna,
 
 	if (src_pixmap && !is_solid(src) && !source_fallback(src)) {
 		priv = sna_pixmap(src_pixmap);
-		if (priv && priv->gpu_damage && !priv->cpu_damage) {
+		if (priv &&
+		    ((priv->gpu_damage && !priv->cpu_damage) ||
+		     (priv->cpu_bo && priv->cpu_bo->domain != DOMAIN_CPU))) {
 			DBG(("%s: src is already on the GPU, try to use GPU\n",
 			     __FUNCTION__));
 			return FALSE;
@@ -2483,7 +2495,9 @@ gen3_composite_fallback(struct sna *sna,
 	}
 	if (mask_pixmap && !is_solid(mask) && !source_fallback(mask)) {
 		priv = sna_pixmap(mask_pixmap);
-		if (priv && priv->gpu_damage && !priv->cpu_damage) {
+		if (priv &&
+		    ((priv->gpu_damage && !priv->cpu_damage) ||
+		     (priv->cpu_bo && priv->cpu_bo->domain != DOMAIN_CPU))) {
 			DBG(("%s: mask is already on the GPU, try to use GPU\n",
 			     __FUNCTION__));
 			return FALSE;
commit b76a6da3fa0148ef32600dd9505e22b90de037df
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 26 10:47:01 2012 +0000

    sna: Search the buckets above the desired size in the bo cache
    
    It is preferrable to reuse a slightly larger bo, than it is to create a
    fresh one and map it into the aperture. So search the bucket above us as
    well.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index a2fcefc..6cd86e6 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2189,7 +2189,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 	struct kgem_bo *bo, *next;
 	uint32_t pitch, untiled_pitch, tiled_height, size;
 	uint32_t handle;
-	int i;
+	int i, bucket, retry;
 
 	if (tiling < 0)
 		tiling = -tiling, flags |= CREATE_EXACT;
@@ -2208,6 +2208,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 				 width, height, bpp, tiling, &pitch);
 	assert(size && size < kgem->max_cpu_size);
 	assert(tiling == I915_TILING_NONE || size < kgem->max_gpu_size);
+	bucket = cache_bucket(size);
 
 	if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
 		int for_cpu = !!(flags & CREATE_CPU_MAP);
@@ -2216,10 +2217,10 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 		/* We presume that we will need to upload to this bo,
 		 * and so would prefer to have an active VMA.
 		 */
-		cache = &kgem->vma[for_cpu].inactive[cache_bucket(size)];
+		cache = &kgem->vma[for_cpu].inactive[bucket];
 		do {
 			list_for_each_entry(bo, cache, vma) {
-				assert(bo->bucket == cache_bucket(size));
+				assert(bo->bucket == bucket);
 				assert(bo->refcnt == 0);
 				assert(bo->map);
 				assert(IS_CPU_MAP(bo->map) == for_cpu);
@@ -2263,13 +2264,17 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 		goto skip_active_search;
 
 	/* Best active match */
-	cache = active(kgem, size, tiling);
+	retry = NUM_CACHE_BUCKETS - bucket;
+	if (retry > 3)
+		retry = 3;
+search_again:
+	cache = &kgem->active[bucket][tiling];
 	if (tiling) {
 		tiled_height = kgem_aligned_height(kgem, height, tiling);
 		list_for_each_entry(bo, cache, list) {
-			assert(bo->bucket == cache_bucket(size));
 			assert(!bo->purged);
 			assert(bo->refcnt == 0);
+			assert(bo->bucket == bucket);
 			assert(bo->reusable);
 			assert(bo->tiling == tiling);
 
@@ -2280,7 +2285,6 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 				continue;
 			}
 
-
 			if (bo->pitch * tiled_height > bo->size)
 				continue;
 
@@ -2294,7 +2298,7 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 		}
 	} else {
 		list_for_each_entry(bo, cache, list) {
-			assert(bo->bucket == cache_bucket(size));
+			assert(bo->bucket == bucket);
 			assert(!bo->purged);
 			assert(bo->refcnt == 0);
 			assert(bo->reusable);
@@ -2314,6 +2318,11 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 		}
 	}
 
+	if (--retry && flags & CREATE_EXACT) {
+		bucket++;
+		goto search_again;
+	}
+
 	if ((flags & CREATE_EXACT) == 0) { /* allow an active near-miss? */
 		untiled_pitch = kgem_untiled_pitch(kgem,
 						   width, bpp,
@@ -2356,10 +2365,15 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 	}
 
 skip_active_search:
+	bucket = cache_bucket(size);
+	retry = NUM_CACHE_BUCKETS - bucket;
+	if (retry > 3)
+		retry = 3;
+search_inactive:
 	/* Now just look for a close match and prefer any currently active */
-	cache = inactive(kgem, size);
+	cache = &kgem->inactive[bucket];
 	list_for_each_entry_safe(bo, next, cache, list) {
-		assert(bo->bucket == cache_bucket(size));
+		assert(bo->bucket == bucket);
 
 		if (size > bo->size) {
 			DBG(("inactive too small: %d < %d\n",
@@ -2409,10 +2423,15 @@ skip_active_search:
 	if (flags & CREATE_INACTIVE && !list_is_empty(&kgem->requests)) {
 		if (kgem_retire(kgem)) {
 			flags &= ~CREATE_INACTIVE;
-			goto skip_active_search;
+			goto search_inactive;
 		}
 	}
 
+	if (--retry) {
+		bucket++;
+		goto search_inactive;
+	}
+
 	handle = gem_create(kgem->fd, size);
 	if (handle == 0)
 		return NULL;
commit e2b8b1c145932e2254a705905c60f18c200cf2e8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 26 12:42:12 2012 +0000

    sna: Apply any previous transformation when downsampling
    
    In order to handle rotations and fractional offsets produced by the act
    of downsampling, we need to compute the full affine transformation and
    apply it to the vertices rather than attempt to fudge it with an integer
    offset.
    
    References: https://bugs.freedesktop.org/show_bug.cgi?id=45086
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 6eae248..a2fcefc 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -3323,67 +3323,6 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
 	return bo;
 }
 
-struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
-						pixman_format_code_t format,
-						const void *data,
-						int x, int y,
-						int width, int height,
-						int stride, int bpp)
-{
-	struct kgem_bo *bo;
-	pixman_image_t *src_image, *dst_image;
-	pixman_transform_t t;
-	int w, h;
-	void *dst;
-
-	DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n",
-	     __FUNCTION__, x, y, width, height, stride, bpp));
-
-	w = (width + 1) / 2;
-	h = (height + 1) / 2;
-
-	bo = kgem_create_buffer_2d(kgem, w, h, bpp,
-				   KGEM_BUFFER_WRITE_INPLACE,
-				   &dst);
-	if (bo == NULL)
-		return NULL;
-
-	dst_image = pixman_image_create_bits(format, w, h, dst, bo->pitch);
-	if (dst_image == NULL)
-		goto cleanup_bo;
-
-	src_image = pixman_image_create_bits(format, width, height,
-					     (uint32_t*)data, stride);
-	if (src_image == NULL)
-		goto cleanup_dst;
-
-	memset(&t, 0, sizeof(t));
-	t.matrix[0][0] = 2 << 16;
-	t.matrix[1][1] = 2 << 16;
-	t.matrix[2][2] = 1 << 16;
-	pixman_image_set_transform(src_image, &t);
-	pixman_image_set_filter(src_image, PIXMAN_FILTER_BILINEAR, NULL, 0);
-	pixman_image_set_repeat(src_image, PIXMAN_REPEAT_PAD);
-
-	pixman_image_composite(PIXMAN_OP_SRC,
-			       src_image, NULL, dst_image,
-			       x, y,
-			       0, 0,
-			       0, 0,
-			       w, h);
-
-	pixman_image_unref(src_image);
-	pixman_image_unref(dst_image);
-
-	return bo;
-
-cleanup_dst:
-	pixman_image_unref(dst_image);
-cleanup_bo:
-	kgem_bo_destroy(kgem, bo);
-	return NULL;
-}
-
 void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 {
 	struct kgem_partial_bo *bo;
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index fd3aa9d..db4f061 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -191,12 +191,6 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem,
 					 const void *data,
 					 BoxPtr box,
 					 int stride, int bpp);
-struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
-						pixman_format_code_t format,
-						const void *data,
-						int x, int y,
-						int width, int height,
-						int stride, int bpp);
 
 int kgem_choose_tiling(struct kgem *kgem,
 		       int tiling, int width, int height, int bpp);
diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index 28b93a2..9d7857c 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -615,15 +615,17 @@ static int sna_render_picture_downsample(struct sna *sna,
 					 int16_t w, int16_t h,
 					 int16_t dst_x, int16_t dst_y)
 {
-	struct kgem_bo *bo = NULL;
 	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
-	int16_t ox, oy, ow, oh;
-	BoxRec box;
-
-	assert(w && h);
-
-	DBG(("%s (%d, %d)x(%d, %d) [dst=(%d, %d)]\n",
-	     __FUNCTION__, x, y, w, h, dst_x, dst_y));
+	ScreenPtr screen = pixmap->drawable.pScreen;
+	PicturePtr tmp_src, tmp_dst;
+	PictFormatPtr format;
+	struct sna_pixmap *priv;
+	pixman_transform_t t;
+	PixmapPtr tmp;
+	int width, height;
+	int sx, sy, ox, oy, ow, oh;
+	int error, ret = 0;
+	BoxRec box, b;
 
 	ow = w;
 	oh = h;
@@ -645,12 +647,6 @@ static int sna_render_picture_downsample(struct sna *sna,
 		oy = v.vector[1] / v.vector[2];
 	}
 
-	/* Align the origin to an even pixel so that the sampling of
-	 * partial images is stable.
-	 */
-	box.x1 &= ~1;
-	box.y1 &= ~1;
-
 	if (channel->repeat == RepeatNone || channel->repeat == RepeatPad) {
 		if (box.x1 < 0)
 			box.x1 = 0;
@@ -684,184 +680,124 @@ static int sna_render_picture_downsample(struct sna *sna,
 
 	w = box.x2 - box.x1;
 	h = box.y2 - box.y1;
-	DBG(("%s: sample area (%d, %d), (%d, %d): %dx%d\n",
-	     __FUNCTION__, box.x1, box.y1, box.x2, box.y2, w, h));
-	assert(w && h);
-	if (w > 2*sna->render.max_3d_size || h > 2*sna->render.max_3d_size) {
-		DBG(("%s: sample size too large for pixman downscaling\n",
-		     __FUNCTION__));
-		goto fixup;
-	}
 
-	if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
-		DBG(("%s: uploading partial texture\n", __FUNCTION__));
-		bo = kgem_upload_source_image_halved(&sna->kgem,
-						     picture->format,
-						     pixmap->devPrivate.ptr,
-						     box.x1, box.y1, w, h,
-						     pixmap->devKind,
-						     pixmap->drawable.bitsPerPixel);
-		if (!bo) {
-			DBG(("%s: failed to upload source image, using clear\n",
-			     __FUNCTION__));
-			return 0;
-		}
-	} else {
-		ScreenPtr screen = pixmap->drawable.pScreen;
-		PicturePtr tmp_src, tmp_dst;
-		PictFormatPtr format;
-		struct sna_pixmap *priv;
-		pixman_transform_t t;
-		PixmapPtr tmp;
-		int error, i, j, ww, hh, ni, nj;
-
-		if (!sna_pixmap_move_to_gpu(pixmap, MOVE_READ))
-			goto fixup;
-
-		tmp = screen->CreatePixmap(screen,
-					   (w+1)/2, (h+1)/2,
-					   pixmap->drawable.depth,
-					   SNA_CREATE_SCRATCH);
-		if (!tmp)
-			goto fixup;
-
-		priv = sna_pixmap(tmp);
-		if (!priv) {
-			screen->DestroyPixmap(tmp);
-			goto fixup;
-		}
+	DBG(("%s: sample (%d, %d), (%d, %d)\n",
+	     __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
 
-		format = PictureMatchFormat(screen,
-					    pixmap->drawable.depth,
-					    picture->format);
+	sx = (w + sna->render.max_3d_size - 1) / sna->render.max_3d_size;
+	sy = (h + sna->render.max_3d_size - 1) / sna->render.max_3d_size;
 
-		tmp_dst = CreatePicture(0, &tmp->drawable, format, 0, NULL,
-					serverClient, &error);
-		if (!tmp_dst) {
-			screen->DestroyPixmap(tmp);
-			goto fixup;
-		}
+	DBG(("%s: scaling (%d, %d) down by %dx%d\n",
+	     __FUNCTION__, w, h, sx, sy));
 
-		tmp_src = CreatePicture(0, &pixmap->drawable, format, 0, NULL,
-					serverClient, &error);
-		if (!tmp_src) {
-			FreePicture(tmp_dst, 0);
-			screen->DestroyPixmap(tmp);
-			goto fixup;
-		}
+	width  = w / sx;
+	height = h / sy;
 
-		tmp_src->repeat = true;
-		tmp_src->repeatType = RepeatPad;
-		tmp_src->filter = PictFilterBilinear;
-		memset(&t, 0, sizeof(t));
-		t.matrix[0][0] = 2 << 16;
-		t.matrix[1][1] = 2 << 16;
-		t.matrix[2][2] = 1 << 16;
-		tmp_src->transform = &t;
-
-		ValidatePicture(tmp_dst);
-		ValidatePicture(tmp_src);
-
-		if (w > sna->render.max_3d_size) {
-			ww = (w+3)/4;
-			nj = 2;
-		} else {
-			ww = (w+1)/2;
-			nj = 1;
-		}
+	DBG(("%s: creating temporary GPU bo %dx%d\n",
+	     __FUNCTION__, width, height));
 
-		if (h > sna->render.max_3d_size) {
-			hh = (h+3)/4;
-			ni = 2;
-		} else {
-			hh = (h+1)/2;
-			ni = 1;
-		}
+	tmp = screen->CreatePixmap(screen,
+				   width, height,
+				   pixmap->drawable.depth,
+				   SNA_CREATE_SCRATCH);
+	if (!tmp)
+		return 0;
 
-		DBG(("%s %d:%d downsampling using %dx%d GPU tiles\n",
-		     __FUNCTION__, nj, ni, ww, hh));
-
-		for (i = 0; i < ni; i++) {
-			BoxRec b;
-
-			b.y1 = hh*i;
-			if (i == ni - 1)
-				b.y2 = (h+1)/2;
-			else
-				b.y2 = b.y1 + hh;
-
-			for (j = 0; j < nj; j++) {
-				struct sna_composite_op op;
-
-				b.x1 = ww*j;
-				if (j == nj - 1)
-					b.x2 = (w+1)/2;
-				else
-					b.x2 = b.x1 + ww;
-
-				DBG(("%s: tile %d:%d, box=(%d,%d), (%d, %d)\n",
-				     __FUNCTION__, i, j, b.x1, b.y1, b.x2, b.y2));
-
-				memset(&op, 0, sizeof(op));
-				if (!sna->render.composite(sna,
-							   PictOpSrc,
-							   tmp_src, NULL, tmp_dst,
-							   box.x1/2 + b.x1, box.y1/2 + b.y1,
-							   0, 0,
-							   b.x1, b.y1,
-							   b.x2 - b.x1, b.y2 - b.y1,
-							   &op)) {
-					tmp_src->transform = NULL;
-					FreePicture(tmp_src, 0);
-					FreePicture(tmp_dst, 0);
-					screen->DestroyPixmap(tmp);
-					goto fixup;
-				}
-
-				op.boxes(sna, &op, &b, 1);
-				op.done(sna, &op);
-			}
+	priv = sna_pixmap(tmp);
+	if (!priv)
+		goto cleanup_tmp;
+
+	format = PictureMatchFormat(screen,
+				    pixmap->drawable.depth,
+				    picture->format);
+
+	tmp_dst = CreatePicture(0, &tmp->drawable, format, 0, NULL,
+				serverClient, &error);
+	if (!tmp_dst)
+		goto cleanup_tmp;
+
+	tmp_src = CreatePicture(0, &pixmap->drawable, format, 0, NULL,
+				serverClient, &error);
+	if (!tmp_src)
+		goto cleanup_dst;
+
+	tmp_src->repeat = 1;
+	tmp_src->repeatType = RepeatPad;
+	/* Prefer to use nearest as it helps reduce artefacts from
+	 * interpolating and filtering twice.
+	 */
+	tmp_src->filter = PictFilterNearest;
+	memset(&t, 0, sizeof(t));
+	t.matrix[0][0] = (w << 16) / width;
+	t.matrix[0][2] = box.x1 << 16;
+	t.matrix[1][1] = (h << 16) / height;
+	t.matrix[1][2] = box.y1 << 16;
+	t.matrix[2][2] = 1 << 16;
+	tmp_src->transform = &t;
+
+	ValidatePicture(tmp_dst);
+	ValidatePicture(tmp_src);
+
+	w = sna->render.max_3d_size / sx - 2 * sx;
+	h = sna->render.max_3d_size / sy - 2 * sy;
+	DBG(("%s %d:%d downsampling using %dx%d GPU tiles\n",
+	     __FUNCTION__, (width + w-1)/w, (height + h-1)/h, w, h));
+
+	for (b.y1 = 0; b.y1 < height; b.y1 = b.y2) {
+		b.y2 = b.y1 + h;
+		if (b.y2 > height)
+			b.y2 = height;
+
+		for (b.x1 = 0; b.x1 < width; b.x1 = b.x2) {
+			struct sna_composite_op op;
+
+			b.x2 = b.x1 + w;
+			if (b.x2 > width)
+				b.x2 = width;
+
+			DBG(("%s: tile (%d, %d), (%d, %d)\n",
+			     __FUNCTION__, b.x1, b.y1, b.x2, b.y2));
+
+			memset(&op, 0, sizeof(op));
+			if (!sna->render.composite(sna,
+						   PictOpSrc,
+						   tmp_src, NULL, tmp_dst,
+						   b.x1, b.y1,
+						   0, 0,
+						   b.x1, b.y1,
+						   b.x2 - b.x1, b.y2 - b.y1,
+						   &op))
+				goto cleanup_src;
+
+			op.boxes(sna, &op, &b, 1);
+			op.done(sna, &op);
 		}
-
-		bo = kgem_bo_reference(priv->gpu_bo);
-
-		tmp_src->transform = NULL;
-		FreePicture(tmp_src, 0);
-		FreePicture(tmp_dst, 0);
-		screen->DestroyPixmap(tmp);
 	}
 
-	if (ox == x && oy == y) {
-		x = y = 0;
-	} else if (channel->transform) {
-		pixman_vector_t v;
-		pixman_transform_t m;
-
-		v.vector[0] = (ox - box.x1) << 16;
-		v.vector[1] = (oy - box.y1) << 16;
-		v.vector[2] = 1 << 16;
-		pixman_transform_invert(&m, channel->transform);
-		pixman_transform_point(&m, &v);
-		x = v.vector[0] / v.vector[2];
-		y = v.vector[1] / v.vector[2];
-	} else {
-		x = ox - box.x1;
-		y = oy - box.y1;
-	}
+	pixman_transform_invert(&channel->embedded_transform, &t);
+	if (channel->transform)
+		pixman_transform_multiply(&channel->embedded_transform,
+					  &channel->embedded_transform,
+					  channel->transform);
+	channel->transform = &channel->embedded_transform;
 
 	channel->offset[0] = x - dst_x;
 	channel->offset[1] = y - dst_y;
-	channel->scale[0] = 1.f/w;
-	channel->scale[1] = 1.f/h;
-	channel->width  = (w + 1) / 2;
-	channel->height = (h + 1) / 2;
-	channel->bo = bo;
-	return 1;
-
-fixup:
-	return sna_render_picture_fixup(sna, picture, channel,
-					x, y, w, h,
-					dst_x, dst_y);
+	channel->scale[0] = 1.f/width;
+	channel->scale[1] = 1.f/height;
+	channel->width  = width;
+	channel->height = height;
+	channel->bo = kgem_bo_reference(priv->gpu_bo);
+
+	ret = 1;
+cleanup_src:
+	tmp_src->transform = NULL;
+	FreePicture(tmp_src, 0);
+cleanup_dst:
+	FreePicture(tmp_dst, 0);
+cleanup_tmp:
+	screen->DestroyPixmap(tmp);
+	return ret;
 }
 
 int
@@ -965,7 +901,10 @@ sna_render_picture_extract(struct sna *sna,
 	}
 
 	src_bo = use_cpu_bo(sna, pixmap, &box);
-	if (src_bo == NULL) {
+	if (src_bo) {
+		if (!sna_pixmap_move_to_cpu(pixmap, MOVE_READ))
+			return 0;
+	} else {
 		if (texture_is_cpu(pixmap, &box) &&
 		    !move_to_gpu(pixmap, &box)) {
 			bo = kgem_upload_source_image(&sna->kgem,
@@ -981,7 +920,6 @@ sna_render_picture_extract(struct sna *sna,
 				src_bo = priv->gpu_bo;
 		}
 	}
-
 	if (src_bo) {
 		bo = kgem_create_2d(&sna->kgem, w, h,
 				    pixmap->drawable.bitsPerPixel,
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index 4346196..c4711f4 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -57,6 +57,8 @@ struct sna_composite_op {
 		int16_t offset[2];
 		float scale[2];
 
+		pixman_transform_t embedded_transform;
+
 		union {
 			struct {
 				uint32_t pixel;
commit 352828ee59164a9e81093d88dfdd45bc21f0c739
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Thu Jan 26 12:41:34 2012 +0000

    sna: Tweak aperture thresholds for batch flushing
    
    In order to more easily accommodate operations on large source CPU bo.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 1bcda22..6eae248 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -634,7 +634,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 
 	kgem->aperture_total = aperture.aper_size;
 	kgem->aperture_high = aperture.aper_size * 3/4;
-	kgem->aperture_low = aperture.aper_size * 1/4;
+	kgem->aperture_low = aperture.aper_size * 1/3;
 	DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
 	     kgem->aperture_low, kgem->aperture_low / (1024*1024),
 	     kgem->aperture_high, kgem->aperture_high / (1024*1024)));
@@ -2484,9 +2484,6 @@ bool kgem_check_bo(struct kgem *kgem, ...)
 	int num_exec = 0;
 	int size = 0;
 
-	if (kgem->aperture > kgem->aperture_low)
-		return false;
-
 	va_start(ap, kgem);
 	while ((bo = va_arg(ap, struct kgem_bo *))) {
 		if (bo->exec)
@@ -2497,6 +2494,12 @@ bool kgem_check_bo(struct kgem *kgem, ...)
 	}
 	va_end(ap);
 
+	if (!size)
+		return true;
+
+	if (kgem->aperture > kgem->aperture_low)
+		return false;
+
 	if (size + kgem->aperture > kgem->aperture_high)
 		return false;
 
@@ -2515,9 +2518,6 @@ bool kgem_check_bo_fenced(struct kgem *kgem, ...)
 	int size = 0;
 	int fenced_size = 0;
 
-	if (unlikely (kgem->aperture > kgem->aperture_low))
-		return false;
-
 	va_start(ap, kgem);
 	while ((bo = va_arg(ap, struct kgem_bo *))) {
 		if (bo->exec) {
@@ -2544,13 +2544,19 @@ bool kgem_check_bo_fenced(struct kgem *kgem, ...)
 	if (fenced_size + kgem->aperture_fenced > kgem->aperture_mappable)
 		return false;
 
-	if (size + kgem->aperture > kgem->aperture_high)
+	if (kgem->nfence + num_fence > kgem->fence_max)
 		return false;
 
-	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
+	if (!size)
+		return true;
+
+	if (kgem->aperture > kgem->aperture_low)
 		return false;
 
-	if (kgem->nfence + num_fence >= kgem->fence_max)
+	if (size + kgem->aperture > kgem->aperture_high)
+		return false;
+
+	if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem))
 		return false;
 
 	return true;
commit cff6a1a2e4648eb211a1789ae9f711e2f16e9d4d
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 25 23:21:36 2012 +0000

    sna: Use the cpu bo where possible as the source for texture extraction
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c
index d1cb60b..28b93a2 100644
--- a/src/sna/sna_render.c
+++ b/src/sna/sna_render.c
@@ -295,7 +295,7 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box)
 	if (DBG_NO_CPU_BO)
 		return NULL;
 
-	priv = sna_pixmap_attach(pixmap);
+	priv = sna_pixmap(pixmap);
 	if (priv == NULL || priv->cpu_bo == NULL) {
 		DBG(("%s: no cpu bo\n", __FUNCTION__));
 		return NULL;
@@ -332,7 +332,7 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box)
 		int w = box->x2 - box->x1;
 		int h = box->y2 - box->y1;
 
-		if (pixmap->usage_hint)
+		if (!priv->gpu)
 			goto done;
 
 		if (priv->source_count*w*h >= pixmap->drawable.width * pixmap->drawable.height &&
@@ -349,7 +349,7 @@ use_cpu_bo(struct sna *sna, PixmapPtr pixmap, const BoxRec *box)
 done:
 	DBG(("%s for box=(%d, %d), (%d, %d)\n",
 	     __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
-	return kgem_bo_reference(priv->cpu_bo);
+	return priv->cpu_bo;
 }
 
 static Bool
@@ -583,23 +583,25 @@ sna_render_pixmap_bo(struct sna *sna,
 	     pixmap->drawable.width, pixmap->drawable.height));
 
 	bo = use_cpu_bo(sna, pixmap, &box);
-	if (bo == NULL &&
-	    texture_is_cpu(pixmap, &box) &&
-	    !move_to_gpu(pixmap, &box)) {
-		DBG(("%s: uploading CPU box (%d, %d), (%d, %d)\n",
-		     __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
-		bo = upload(sna, channel, pixmap, &box);
-	}
-
-	if (bo == NULL) {
-		priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ);
-		if (priv) {
-			bo = kgem_bo_reference(priv->gpu_bo);
-		} else {
-			DBG(("%s: failed to upload pixmap to gpu, uploading CPU box (%d, %d), (%d, %d) instead\n",
+	if (bo) {
+		bo = kgem_bo_reference(bo);
+	} else {
+		if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
+			DBG(("%s: uploading CPU box (%d, %d), (%d, %d)\n",
 			     __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
 			bo = upload(sna, channel, pixmap, &box);
 		}
+
+		if (bo == NULL) {
+			priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ);
+			if (priv) {
+				bo = kgem_bo_reference(priv->gpu_bo);
+			} else {
+				DBG(("%s: failed to upload pixmap to gpu, uploading CPU box (%d, %d), (%d, %d) instead\n",
+				     __FUNCTION__, box.x1, box.y1, box.x2, box.y2));
+				bo = upload(sna, channel, pixmap, &box);
+			}
+		}
 	}
 
 	channel->bo = bo;
@@ -870,7 +872,7 @@ sna_render_picture_extract(struct sna *sna,
 			   int16_t w, int16_t h,
 			   int16_t dst_x, int16_t dst_y)
 {
-	struct kgem_bo *bo = NULL;
+	struct kgem_bo *bo = NULL, *src_bo;
 	PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable);
 	int16_t ox, oy, ow, oh;
 	BoxRec box;
@@ -962,49 +964,48 @@ sna_render_picture_extract(struct sna *sna,
 						     dst_x, dst_y);
 	}
 
-	if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) {
-		bo = kgem_upload_source_image(&sna->kgem,
-					      pixmap->devPrivate.ptr,
-					      &box,
-					      pixmap->devKind,
-					      pixmap->drawable.bitsPerPixel);
-		if (bo == NULL) {
-			DBG(("%s: failed to upload source image, using clear\n",
-			     __FUNCTION__));
-			return 0;
-		}
-	} else {
-		if (!sna_pixmap_move_to_gpu(pixmap, MOVE_READ)) {
-			DBG(("%s: falback -- pixmap is not on the GPU\n",
-			     __FUNCTION__));
-			return sna_render_picture_fixup(sna, picture, channel,
-							x, y, ow, oh, dst_x, dst_y);
+	src_bo = use_cpu_bo(sna, pixmap, &box);
+	if (src_bo == NULL) {
+		if (texture_is_cpu(pixmap, &box) &&
+		    !move_to_gpu(pixmap, &box)) {
+			bo = kgem_upload_source_image(&sna->kgem,
+						      pixmap->devPrivate.ptr,
+						      &box,
+						      pixmap->devKind,
+						      pixmap->drawable.bitsPerPixel);
+		} else {
+			struct sna_pixmap *priv;
+
+			priv = sna_pixmap_move_to_gpu(pixmap, MOVE_READ);
+			if (priv)
+				src_bo = priv->gpu_bo;
 		}
+	}
 
+	if (src_bo) {
 		bo = kgem_create_2d(&sna->kgem, w, h,
 				    pixmap->drawable.bitsPerPixel,
 				    kgem_choose_tiling(&sna->kgem,
 						       I915_TILING_X, w, h,
 						       pixmap->drawable.bitsPerPixel),
 				    0);
-		if (!bo) {
-			DBG(("%s: failed to create bo, using clear\n",
-			     __FUNCTION__));
-			return 0;
-		}
-
-		if (!sna_blt_copy_boxes(sna, GXcopy,
-					sna_pixmap_get_bo(pixmap), 0, 0,
+		if (bo && !sna_blt_copy_boxes(sna, GXcopy,
+					src_bo, 0, 0,
 					bo, -box.x1, -box.y1,
 					pixmap->drawable.bitsPerPixel,
 					&box, 1)) {
-			DBG(("%s: fallback -- unable to copy boxes\n",
-			     __FUNCTION__));
-			return sna_render_picture_fixup(sna, picture, channel,
-							x, y, ow, oh, dst_x, dst_y);
+			kgem_bo_destroy(&sna->kgem, bo);
+			bo = NULL;
 		}
 	}
 
+	if (bo == NULL) {
+		DBG(("%s: falback -- pixmap is not on the GPU\n",
+		     __FUNCTION__));
+		return sna_render_picture_fixup(sna, picture, channel,
+						x, y, ow, oh, dst_x, dst_y);
+	}
+
 	if (ox == x && oy == y) {
 		x = y = 0;
 	} else if (channel->transform) {
commit e583af9cca4ad2e5643317447c6b065d3ee7d11e
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 25 23:04:50 2012 +0000

    sna: Experiment with creating large objects as CPU bo
    
    Even on non-LLC systems if we can prevent the migration of such
    objects, we can still benefit immensely from being able to map them into
    the GTT as required.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0955a5d..1bcda22 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -632,6 +632,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 	aperture.aper_size = 64*1024*1024;
 	(void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
 
+	kgem->aperture_total = aperture.aper_size;
 	kgem->aperture_high = aperture.aper_size * 3/4;
 	kgem->aperture_low = aperture.aper_size * 1/4;
 	DBG(("%s: aperture low=%d [%d], high=%d [%d]\n", __FUNCTION__,
@@ -657,12 +658,17 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		 * disable dual-stream mode */
 		kgem->min_alignment = 64;
 
-	kgem->max_object_size = kgem->aperture_mappable / 2;
-	if (kgem->max_object_size > kgem->aperture_low)
-		kgem->max_object_size = kgem->aperture_low;
-	if (kgem->max_object_size > MAX_OBJECT_SIZE)
-		kgem->max_object_size = MAX_OBJECT_SIZE;
-	DBG(("%s: max object size %d\n", __FUNCTION__, kgem->max_object_size));
+	kgem->max_gpu_size = kgem->aperture_mappable / 2;
+	if (kgem->max_gpu_size > kgem->aperture_low)
+		kgem->max_gpu_size = kgem->aperture_low;
+	if (kgem->max_gpu_size > MAX_OBJECT_SIZE)
+		kgem->max_gpu_size = MAX_OBJECT_SIZE;
+
+	kgem->max_cpu_size = kgem->aperture_total / 2;
+	if (kgem->max_cpu_size > MAX_OBJECT_SIZE)
+		kgem->max_cpu_size = MAX_OBJECT_SIZE;
+	DBG(("%s: max object size (tiled=%d, linear=%d)\n",
+	     __FUNCTION__, kgem->max_gpu_size, kgem->max_cpu_size));
 
 	kgem->fence_max = gem_param(kgem, I915_PARAM_NUM_FENCES_AVAIL) - 2;
 	if ((int)kgem->fence_max < 0)
@@ -979,6 +985,9 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 		goto destroy;
 	}
 
+	if (!kgem->has_llc && IS_CPU_MAP(bo->map) && bo->domain != DOMAIN_CPU)
+		kgem_bo_release_map(kgem, bo);
+
 	assert(list_is_empty(&bo->vma));
 	assert(list_is_empty(&bo->list));
 	assert(bo->vmap == false && bo->sync == false);
@@ -1010,6 +1019,10 @@ static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo)
 	if (!IS_CPU_MAP(bo->map)) {
 		if (!kgem_bo_set_purgeable(kgem, bo))
 			goto destroy;
+
+		if (!kgem->has_llc && bo->domain == DOMAIN_CPU)
+			goto destroy;
+
 		DBG(("%s: handle=%d, purged\n",
 		     __FUNCTION__, bo->handle));
 	}
@@ -1121,8 +1134,11 @@ bool kgem_retire(struct kgem *kgem)
 		if (kgem_bo_set_purgeable(kgem, rq->bo)) {
 			kgem_bo_move_to_inactive(kgem, rq->bo);
 			retired = true;
-		} else
+		} else {
+			DBG(("%s: closing %d\n",
+			     __FUNCTION__, rq->bo->handle));
 			kgem_bo_free(kgem, rq->bo);
+		}
 
 		_list_del(&rq->list);
 		free(rq);
@@ -1679,9 +1695,13 @@ void kgem_purge_cache(struct kgem *kgem)
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) {
-		list_for_each_entry_safe(bo, next, &kgem->inactive[i], list)
-			if (!kgem_bo_is_retained(kgem, bo))
+		list_for_each_entry_safe(bo, next, &kgem->inactive[i], list) {
+			if (!kgem_bo_is_retained(kgem, bo)) {
+				DBG(("%s: purging %d\n",
+				     __FUNCTION__, bo->handle));
 				kgem_bo_free(kgem, bo);
+			}
+		}
 	}
 
 	kgem->need_purge = false;
@@ -1748,6 +1768,8 @@ bool kgem_expire_cache(struct kgem *kgem)
 				count++;
 				size += bo->size;
 				kgem_bo_free(kgem, bo);
+				DBG(("%s: expiring %d\n",
+				     __FUNCTION__, bo->handle));
 			}
 		}
 		if (!list_is_empty(&preserve)) {
@@ -2033,7 +2055,7 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
 	if (tiling &&
 	    kgem_surface_size(kgem, false, false,
 			      width, height, bpp, tiling,
-			      &pitch) > kgem->max_object_size) {
+			      &pitch) > kgem->max_gpu_size) {
 		DBG(("%s: too large (%dx%d) to be fenced, discarding tiling\n",
 		     __FUNCTION__, width, height));
 		tiling = I915_TILING_NONE;
@@ -2096,43 +2118,46 @@ done:
 	return tiling;
 }
 
-static bool _kgem_can_create_2d(struct kgem *kgem,
-				int width, int height, int bpp, int tiling)
+bool kgem_can_create_cpu(struct kgem *kgem,
+			 int width, int height, int depth)
 {
 	uint32_t pitch, size;
 
-	if (bpp < 8)
+	if (depth < 8 || kgem->wedged)
 		return false;
 
-	if (tiling >= 0 && kgem->wedged)
-		return false;
+	size = kgem_surface_size(kgem, false, false,
+				 width, height, BitsPerPixel(depth),
+				 I915_TILING_NONE, &pitch);
+	return size > 0 && size < kgem->max_cpu_size;
+}
 
-	if (tiling < 0)
-		tiling = -tiling;
+static bool _kgem_can_create_gpu(struct kgem *kgem,
+				 int width, int height, int bpp)
+{
+	uint32_t pitch, size;
+
+	if (bpp < 8 || kgem->wedged)
+		return false;
 
 	size = kgem_surface_size(kgem, false, false,
-				 width, height, bpp, tiling, &pitch);
-	if (size == 0 || size >= kgem->max_object_size)
-		size = kgem_surface_size(kgem, false, false,
-					 width, height, bpp,
-					 I915_TILING_NONE, &pitch);
-	return size > 0 && size < kgem->max_object_size;
+				 width, height, bpp, I915_TILING_NONE,
+				 &pitch);
+	return size > 0 && size < kgem->max_gpu_size;
 }
 
 #if DEBUG_KGEM
-bool kgem_can_create_2d(struct kgem *kgem,
-			int width, int height, int bpp, int tiling)
+bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp)
 {
-	bool ret = _kgem_can_create_2d(kgem, width, height, bpp, tiling);
-	DBG(("%s(%dx%d, bpp=%d, tiling=%d) = %d\n", __FUNCTION__,
-	     width, height, bpp, tiling, ret));
+	bool ret = _kgem_can_create_gpu(kgem, width, height, bpp);
+	DBG(("%s(%dx%d, bpp=%d) = %d\n", __FUNCTION__,
+	     width, height, bpp, ret));
 	return ret;
 }
 #else
-bool kgem_can_create_2d(struct kgem *kgem,
-			int width, int height, int bpp, int tiling)
+bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp)
 {
-	return _kgem_can_create_2d(kgem, width, height, bpp, tiling);
+	return _kgem_can_create_gpu(kgem, width, height, bpp);
 }
 #endif
 
@@ -2177,12 +2202,12 @@ struct kgem_bo *kgem_create_2d(struct kgem *kgem,
 	     !!(flags & CREATE_GTT_MAP),
 	     !!(flags & CREATE_SCANOUT)));
 
-	assert(_kgem_can_create_2d(kgem, width, height, bpp, flags & CREATE_EXACT ? -tiling : tiling));
 	size = kgem_surface_size(kgem,
 				 kgem->has_relaxed_fencing,
 				 flags & CREATE_SCANOUT,
 				 width, height, bpp, tiling, &pitch);
-	assert(size && size <= kgem->max_object_size);
+	assert(size && size < kgem->max_cpu_size);
+	assert(tiling == I915_TILING_NONE || size < kgem->max_gpu_size);
 
 	if (flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
 		int for_cpu = !!(flags & CREATE_CPU_MAP);
@@ -2342,6 +2367,9 @@ skip_active_search:
 			continue;
 		}
 
+		if ((flags & CREATE_CPU_MAP) == 0 && IS_CPU_MAP(bo->map))
+			continue;
+
 		if (bo->tiling != tiling ||
 		    (tiling != I915_TILING_NONE && bo->pitch != pitch)) {
 			if (tiling != gem_set_tiling(kgem->fd,
@@ -2643,8 +2671,11 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
 		list_del(&bo->vma);
 		kgem->vma[type].count--;
 
-		if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo))
+		if (!bo->purged && !kgem_bo_set_purgeable(kgem, bo)) {
+			DBG(("%s: freeing unpurgeable old mapping\n",
+			     __FUNCTION__));
 			kgem_bo_free(kgem, bo);
+		}
 	}
 }
 
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 652c2d7..fd3aa9d 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -151,10 +151,10 @@ struct kgem {
 
 	uint16_t fence_max;
 	uint16_t half_cpu_cache_pages;
-	uint32_t aperture_high, aperture_low, aperture;
-	uint32_t aperture_fenced, aperture_mappable;
+	uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
+	uint32_t aperture, aperture_fenced;
 	uint32_t min_alignment;
-	uint32_t max_object_size;
+	uint32_t max_gpu_size, max_cpu_size;
 	uint32_t partial_buffer_size;
 
 	void (*context_switch)(struct kgem *kgem, int new_mode);
@@ -200,8 +200,8 @@ struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem,
 
 int kgem_choose_tiling(struct kgem *kgem,
 		       int tiling, int width, int height, int bpp);
-bool kgem_can_create_2d(struct kgem *kgem,
-			int width, int height, int bpp, int tiling);
+bool kgem_can_create_gpu(struct kgem *kgem, int width, int height, int bpp);
+bool kgem_can_create_cpu(struct kgem *kgem, int width, int height, int depth);
 
 struct kgem_bo *
 kgem_replace_bo(struct kgem *kgem,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b28134c..759e0fe 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -245,7 +245,7 @@ sna_pixmap_alloc_cpu(struct sna *sna,
 
 	assert(priv->stride);
 
-	if (sna->kgem.has_cpu_bo) {
+	if (sna->kgem.has_cpu_bo || !priv->gpu) {
 		DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
 		     pixmap->drawable.width, pixmap->drawable.height));
 
@@ -515,11 +515,10 @@ struct sna_pixmap *_sna_pixmap_attach(PixmapPtr pixmap)
 		break;
 
 	default:
-		if (!kgem_can_create_2d(&sna->kgem,
-					pixmap->drawable.width,
-					pixmap->drawable.height,
-					pixmap->drawable.bitsPerPixel,
-					I915_TILING_NONE))
+		if (!kgem_can_create_gpu(&sna->kgem,
+					 pixmap->drawable.width,
+					 pixmap->drawable.height,
+					 pixmap->drawable.bitsPerPixel))
 			return NULL;
 		break;
 	}
@@ -586,6 +585,11 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 		return create_pixmap(sna, screen, width, height, depth,
 				     CREATE_PIXMAP_USAGE_SCRATCH);
 
+	bpp = BitsPerPixel(depth);
+	if (!kgem_can_create_gpu(&sna->kgem, width, height, bpp))
+		return create_pixmap(sna, screen, width, height, depth,
+				     CREATE_PIXMAP_USAGE_SCRATCH);
+
 	if (tiling == I915_TILING_Y && !sna->have_render)
 		tiling = I915_TILING_X;
 
@@ -594,11 +598,7 @@ sna_pixmap_create_scratch(ScreenPtr screen,
 	     height > sna->render.max_3d_size))
 		tiling = I915_TILING_X;
 
-	bpp = BitsPerPixel(depth);
 	tiling = kgem_choose_tiling(&sna->kgem, tiling, width, height, bpp);
-	if (!kgem_can_create_2d(&sna->kgem, width, height, bpp, tiling))
-		return create_pixmap(sna, screen, width, height, depth,
-				     CREATE_PIXMAP_USAGE_SCRATCH);
 
 	/* you promise never to access this via the cpu... */
 	if (sna->freed_pixmap) {
@@ -669,7 +669,10 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 	DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__,
 	     width, height, depth, usage));
 
-	if (depth < 8 || wedged(sna) || !sna->have_render)
+	if (!kgem_can_create_cpu(&sna->kgem, width, height, depth))
+		return create_pixmap(sna, screen, width, height, depth, usage);
+
+	if (!sna->have_render)
 		return create_pixmap(sna, screen,
 				     width, height, depth,
 				     usage);
@@ -696,13 +699,11 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 						 width, height, depth,
 						 I915_TILING_Y);
 
-	if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE ||
-	    !kgem_can_create_2d(&sna->kgem, width, height,
-				BitsPerPixel(depth), I915_TILING_NONE))
+	if (usage == CREATE_PIXMAP_USAGE_GLYPH_PICTURE)
 		return create_pixmap(sna, screen, width, height, depth, usage);
 
 	pad = PixmapBytePad(width, depth);
-	if (pad*height <= 4096) {
+	if (pad * height <= 4096) {
 		pixmap = create_pixmap(sna, screen,
 				       width, height, depth, usage);
 		if (pixmap == NullPixmap)
@@ -729,7 +730,9 @@ static PixmapPtr sna_create_pixmap(ScreenPtr screen,
 		}
 
 		priv->stride = pad;
-		priv->gpu = true;
+		priv->gpu = kgem_can_create_gpu(&sna->kgem,
+						width, height,
+						pixmap->drawable.bitsPerPixel);
 	}
 
 	return pixmap;
@@ -1821,6 +1824,7 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
 {
 	PixmapPtr pixmap = get_drawable_pixmap(drawable);
 	struct sna_pixmap *priv = sna_pixmap(pixmap);
+	struct sna *sna = to_sna_from_pixmap(pixmap);
 	BoxRec extents;
 	int16_t dx, dy;
 
@@ -1829,6 +1833,9 @@ _sna_drawable_use_cpu_bo(DrawablePtr drawable,
 	if (priv == NULL || priv->cpu_bo == NULL)
 		return FALSE;
 
+	if (!sna->kgem.has_llc && priv->cpu_bo->domain == DOMAIN_CPU)
+		return FALSE;
+
 	if (DAMAGE_IS_ALL(priv->cpu_damage)) {
 		*damage = NULL;
 		return TRUE;
@@ -1876,9 +1883,7 @@ sna_pixmap_create_upload(ScreenPtr screen,
 	assert(width);
 	assert(height);
 	if (!sna->have_render ||
-	    !kgem_can_create_2d(&sna->kgem,
-				width, height, bpp,
-				I915_TILING_NONE))
+	    !kgem_can_create_gpu(&sna->kgem, width, height, bpp))
 		return create_pixmap(sna, screen, width, height, depth,
 				     CREATE_PIXMAP_USAGE_SCRATCH);
 
@@ -2024,7 +2029,7 @@ sna_pixmap_move_to_gpu(PixmapPtr pixmap, unsigned flags)
 	sna_damage_reduce(&priv->cpu_damage);
 	DBG(("%s: CPU damage? %d\n", __FUNCTION__, priv->cpu_damage != NULL));
 	if (priv->gpu_bo == NULL) {
-		if (!wedged(sna))
+		if (!wedged(sna) && priv->gpu)
 			priv->gpu_bo =
 				kgem_create_2d(&sna->kgem,
 					       pixmap->drawable.width,
@@ -3195,24 +3200,19 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
 	}
 
 	/* Try to maintain the data on the GPU */
-	if (dst_priv->gpu_bo == NULL &&
+	if (dst_priv->gpu_bo == NULL && dst_priv->gpu &&
 	    ((dst_priv->cpu_damage == NULL && copy_use_gpu_bo(sna, dst_priv, &region)) ||
 	     (src_priv && (src_priv->gpu_bo != NULL || (src_priv->cpu_bo && kgem_bo_is_busy(src_priv->cpu_bo)))))) {
 		uint32_t tiling = sna_pixmap_choose_tiling(dst_pixmap);
 
 		DBG(("%s: create dst GPU bo for upload\n", __FUNCTION__));
 
-		if (kgem_can_create_2d(&sna->kgem,
+		dst_priv->gpu_bo =
+			kgem_create_2d(&sna->kgem,
 				       dst_pixmap->drawable.width,
 				       dst_pixmap->drawable.height,
 				       dst_pixmap->drawable.bitsPerPixel,
-				       tiling))
-			dst_priv->gpu_bo =
-				kgem_create_2d(&sna->kgem,
-					       dst_pixmap->drawable.width,
-					       dst_pixmap->drawable.height,
-					       dst_pixmap->drawable.bitsPerPixel,
-					       tiling, 0);
+				       tiling, 0);
 	}
 
 	if (dst_priv->gpu_bo) {
commit 55569272f7d4232ef50f7b964dda82f85a190b99
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 25 20:13:27 2012 +0000

    sna: Apply the same migration flags for the dst alphamap as for the dst pixmap
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 6fb2d27..0faad84 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -519,8 +519,7 @@ fallback:
 	if (!sna_drawable_move_region_to_cpu(dst->pDrawable, &region, flags))
 		goto out;
 	if (dst->alphaMap &&
-	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable,
-				      MOVE_WRITE | MOVE_READ))
+	    !sna_drawable_move_to_cpu(dst->alphaMap->pDrawable, flags))
 		goto out;
 	if (src->pDrawable) {
 		if (!sna_drawable_move_to_cpu(src->pDrawable,
commit 4a132ddbf06e5ffc364c25002a1e46ad8bf0e45a
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 25 20:12:55 2012 +0000

    sna: Correct offset for moving drawable regions to the CPU
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna.h b/src/sna/sna.h
index d8f96e0..bc14967 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -465,7 +465,8 @@ sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags)
 	RegionRec region;
 
 	pixman_region_init_rect(&region,
-				0, 0, drawable->width, drawable->height);
+				drawable->x, drawable->y,
+				drawable->width, drawable->height);
 	return sna_drawable_move_region_to_cpu(drawable, &region, flags);
 }
 
commit 65164d90b7b17ec7eea1e24d4b02ec037b55b1ff
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 25 20:12:27 2012 +0000

    sna/gen2+: Do not force use of GPU if the target is simply cleared
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index d75a412..eb8d4ef 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1558,7 +1558,7 @@ gen2_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && priv->gpu_damage) {
+	if (priv && priv->gpu_damage && !priv->clear) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index 618f694..95a79b2 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2467,7 +2467,7 @@ gen3_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && priv->gpu_damage) {
+	if (priv && priv->gpu_damage && !priv->clear) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index c1ceb33..c798ce5 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2056,7 +2056,7 @@ gen4_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && priv->gpu_damage) {
+	if (priv && priv->gpu_damage && !priv->clear) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index 6308d10..47c4e96 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2094,7 +2094,7 @@ gen5_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && priv->gpu_damage) {
+	if (priv && priv->gpu_damage && !priv->clear) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 5cbdd74..c3bc2e7 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2307,7 +2307,9 @@ gen6_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && (priv->gpu_damage || (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
+	if (priv &&
+	    ((priv->gpu_damage && !priv->clear) ||
+	     (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index ee546e1..21d8c99 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2374,7 +2374,9 @@ gen7_composite_fallback(struct sna *sna,
 
 	/* If anything is on the GPU, push everything out to the GPU */
 	priv = sna_pixmap(dst_pixmap);
-	if (priv && (priv->gpu_damage || (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
+	if (priv &&
+	    ((priv->gpu_damage && !priv->clear) ||
+	     (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)))) {
 		DBG(("%s: dst is already on the GPU, try to use GPU\n",
 		     __FUNCTION__));
 		return FALSE;
commit 307f493d76580687a3cf56106bf296475f1f53e5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Jan 25 20:11:21 2012 +0000

    sna: Map freshly created, unbound bo through the CPU
    
    Take advantage that we know we will have to clflush the unbound bo
    before use by the GPU and populate it inplace.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 1e78c1a..0955a5d 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -865,8 +865,10 @@ static void kgem_bo_release_map(struct kgem *kgem, struct kgem_bo *bo)
 	munmap(CPU_MAP(bo->map), bo->size);
 	bo->map = NULL;
 
-	list_del(&bo->vma);
-	kgem->vma[type].count--;
+	if (!list_is_empty(&bo->vma)) {
+		list_del(&bo->vma);
+		kgem->vma[type].count--;
+	}
 }
 
 static void kgem_bo_free(struct kgem *kgem, struct kgem_bo *bo)
@@ -2393,6 +2395,7 @@ skip_active_search:
 		return NULL;
 	}
 
+	bo->domain = DOMAIN_CPU;
 	bo->unique_id = kgem_get_unique_id(kgem);
 	bo->pitch = pitch;
 	if (tiling != I915_TILING_NONE)
@@ -2598,6 +2601,8 @@ static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
 {
 	int i, j;
 
+	DBG(("%s: type=%d, count=%d (bucket: %d)\n",
+	     __FUNCTION__, type, kgem->vma[type].count, bucket));
 	if (kgem->vma[type].count <= 0)
 	       return;
 
@@ -2647,14 +2652,17 @@ void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo)
 {
 	void *ptr;
 
-	DBG(("%s: handle=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
-	     bo->handle, bo->tiling, bo->map, bo->domain));
+	DBG(("%s: handle=%d, offset=%d, tiling=%d, map=%p, domain=%d\n", __FUNCTION__,
+	     bo->handle, bo->presumed_offset, bo->tiling, bo->map, bo->domain));
 
 	assert(!bo->purged);
 	assert(bo->exec == NULL);
 	assert(list_is_empty(&bo->list));
 
-	if (kgem->has_llc && bo->tiling == I915_TILING_NONE) {
+	if (bo->tiling == I915_TILING_NONE &&
+	    (kgem->has_llc || bo->domain == bo->presumed_offset)) {
+		DBG(("%s: converting request for GTT map into CPU map\n",
+		     __FUNCTION__));
 		ptr = kgem_bo_map__cpu(kgem, bo);
 		kgem_bo_sync__cpu(kgem, bo);
 		return ptr;
@@ -3110,6 +3118,8 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 		if (old == NULL)
 			old = search_linear_cache(kgem, alloc, CREATE_INACTIVE);
 		if (old) {
+			DBG(("%s: reusing ordinary handle %d for io\n",
+			     __FUNCTION__, old->handle));
 			alloc = old->size;
 			bo = partial_bo_alloc(alloc);
 			if (bo == NULL)
@@ -3125,20 +3135,55 @@ struct kgem_bo *kgem_create_buffer(struct kgem *kgem,
 			list_init(&bo->base.list);
 			free(old);
 			bo->base.refcnt = 1;
+
+			bo->need_io = flags & KGEM_BUFFER_WRITE;
+			bo->base.io = true;
 		} else {
-			bo = partial_bo_alloc(alloc);
+			bo = malloc(sizeof(*bo));
 			if (bo == NULL)
 				return NULL;
 
-			if (!__kgem_bo_init(&bo->base,
-					    gem_create(kgem->fd, alloc),
-					    alloc)) {
-				free(bo);
+			old = search_linear_cache(kgem, alloc,
+						  CREATE_INACTIVE | CREATE_CPU_MAP);
+			if (old) {
+				DBG(("%s: reusing cpu map handle=%d for buffer\n",
+				     __FUNCTION__, old->handle));
+
+				memcpy(&bo->base, old, sizeof(*old));
+				if (old->rq)
+					list_replace(&old->request, &bo->base.request);
+				else
+					list_init(&bo->base.request);
+				list_replace(&old->vma, &bo->base.vma);
+				list_init(&bo->base.list);
+				free(old);
+				bo->base.refcnt = 1;
+			} else {
+				if (!__kgem_bo_init(&bo->base,
+						    gem_create(kgem->fd, alloc),
+						    alloc)) {
+					free(bo);
+					return NULL;
+				}
+				DBG(("%s: created handle=%d for buffer\n",
+				     __FUNCTION__, bo->base.handle));
+
+				bo->base.domain = DOMAIN_CPU;
+			}
+
+			bo->mem = kgem_bo_map__cpu(kgem, &bo->base);
+			if (bo->mem == NULL) {
+				kgem_bo_free(kgem, &bo->base);
 				return NULL;
 			}
+
+			if (flags & KGEM_BUFFER_WRITE)
+				kgem_bo_sync__cpu(kgem, &bo->base);
+
+			bo->need_io = false;
+			bo->base.io = true;
+			bo->mmapped = true;
 		}
-		bo->need_io = flags & KGEM_BUFFER_WRITE;
-		bo->base.io = true;
 	}
 	bo->base.reusable = false;
 	assert(bo->base.size == alloc);
@@ -3343,8 +3388,8 @@ void kgem_buffer_read_sync(struct kgem *kgem, struct kgem_bo *_bo)
 		gem_read(kgem->fd,
 			 bo->base.handle, (char *)bo->mem+offset,
 			 offset, length);
+		kgem_bo_map__cpu(kgem, &bo->base);
 		bo->base.domain = DOMAIN_NONE;
-		bo->base.reusable = false; /* in the CPU cache now */
 	}
 	kgem_bo_retire(kgem, &bo->base);
 }


More information about the xorg-commit mailing list