xf86-video-intel: 4 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c

Wed Feb 8 12:12:18 PST 2012

src/sna/gen2_render.c |    8 +++-
 src/sna/gen3_render.c |    8 +++-
 src/sna/gen4_render.c |    8 +++-
 src/sna/gen5_render.c |    8 +++-
 src/sna/gen6_render.c |    8 +++-
 src/sna/gen7_render.c |    8 +++-
 src/sna/kgem.c        |   89 ++++++++++++++++++++++++++++----------------------
 7 files changed, 93 insertions(+), 44 deletions(-)

New commits:
commit 4d8369f8e60fd4f5a0ef49f3e9866ea5ecb21927
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Feb 8 13:15:46 2012 +0000

    sna/gen2+: Force upload rather than perform source transformations on the CPU
    
    If both the source and destination is on the CPU, then the thinking was
    it would be quicker to operate on those on the CPU rather than copy both
    to the GPU and then perform the operation. This turns out to be a false
    assumption if transformation is involved -- something to be reconsidered
    if pixman should ever be improved.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index ed48ce6..64b4e7c 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1507,9 +1507,15 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
 need_upload(PicturePtr p)
 {
-	return p->pDrawable && unattached(p->pDrawable);
+	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 }
 
 static bool
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index fc006ac..97e5839 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2419,9 +2419,15 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
 need_upload(PicturePtr p)
 {
-	return p->pDrawable && unattached(p->pDrawable);
+	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 }
 
 static bool
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 6246538..6e7d4be 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2016,9 +2016,15 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
 need_upload(PicturePtr p)
 {
-	return p->pDrawable && unattached(p->pDrawable);
+	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 }
 
 static bool
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index b9c7a92..7ac993c 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2044,9 +2044,15 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
 need_upload(PicturePtr p)
 {
-	return p->pDrawable && unattached(p->pDrawable);
+	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 }
 
 static bool
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 17789e9..1476ff7 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2325,9 +2325,15 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
 need_upload(PicturePtr p)
 {
-	return p->pDrawable && unattached(p->pDrawable);
+	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 }
 
 static bool
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 9757405..5740a42 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2329,9 +2329,15 @@ has_alphamap(PicturePtr p)
 }
 
 static bool
+untransformed(PicturePtr p)
+{
+	return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
 need_upload(PicturePtr p)
 {
-	return p->pDrawable && unattached(p->pDrawable);
+	return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
 }
 
 static bool
commit 8634d461bd9e5a3d3f75b0efc11db87b8d3e1245
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Feb 8 09:13:27 2012 +0000

    sna: Limit max CPU bo size to prevent aperture thrashing on upload
    
    Copying between two objects that consume more than the available GATT
    space is a painful experience due to the forced use of an intermediatory
    and eviction on every batch. The tiled upload paths are in comparison
    remarkably efficient, so favour their use when handling extremely large
    buffers.
    
    This reverses the previous idea in that we now prefer large GPU bo
    rather than large CPU bo, as the render pipeline is far more flexible
    for handling those than the blitter is for handling the CPU bo (at least
    for gen4+).
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 94b6c18..1c23320 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -575,6 +575,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 {
 	struct drm_i915_gem_get_aperture aperture;
 	size_t totalram;
+	unsigned half_gpu_max;
 	unsigned int i, j;
 
 	memset(kgem, 0, sizeof(*kgem));
@@ -679,7 +680,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		kgem->min_alignment = 64;
 
 	kgem->max_object_size = 2 * kgem->aperture_total / 3;
-	kgem->max_cpu_size = kgem->max_object_size;
 	kgem->max_gpu_size = kgem->max_object_size;
 	if (!kgem->has_llc)
 		kgem->max_gpu_size = MAX_CACHE_SIZE;
@@ -691,16 +691,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		if (kgem->max_gpu_size > kgem->aperture_low)
 			kgem->max_gpu_size = kgem->aperture_low;
 	}
-	if (kgem->max_gpu_size > kgem->max_cpu_size)
-		kgem->max_gpu_size = kgem->max_cpu_size;
-
-	kgem->max_upload_tile_size = kgem->aperture_mappable / 2;
-	if (kgem->max_upload_tile_size > kgem->max_gpu_size / 2)
-		kgem->max_upload_tile_size = kgem->max_gpu_size / 2;
-
-	kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
-	if (kgem->max_copy_tile_size > kgem->max_gpu_size / 2)
-		kgem->max_copy_tile_size = kgem->max_gpu_size / 2;
 
 	totalram = total_ram_size();
 	if (totalram == 0) {
@@ -708,14 +698,32 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
 		     __FUNCTION__));
 		totalram = kgem->aperture_total;
 	}
+	DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram));
 	if (kgem->max_object_size > totalram / 2)
 		kgem->max_object_size = totalram / 2;
-	if (kgem->max_cpu_size > totalram / 2)
-		kgem->max_cpu_size = totalram / 2;
 	if (kgem->max_gpu_size > totalram / 4)
 		kgem->max_gpu_size = totalram / 4;
 
+	half_gpu_max = kgem->max_gpu_size / 2;
+	if (kgem->gen >= 40)
+		kgem->max_cpu_size = half_gpu_max;
+	else
+		kgem->max_cpu_size = kgem->max_object_size;
+
+	kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
+	if (kgem->max_copy_tile_size > half_gpu_max)
+		kgem->max_copy_tile_size = half_gpu_max;
+
+	if (kgem->has_llc)
+		kgem->max_upload_tile_size = kgem->max_copy_tile_size;
+	else
+		kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
+	if (kgem->max_upload_tile_size > half_gpu_max)
+		kgem->max_upload_tile_size = half_gpu_max;
+
 	kgem->large_object_size = MAX_CACHE_SIZE;
+	if (kgem->large_object_size > kgem->max_cpu_size)
+		kgem->large_object_size = kgem->max_cpu_size;
 	if (kgem->large_object_size > kgem->max_gpu_size)
 		kgem->large_object_size = kgem->max_gpu_size;
 
commit 5b16972d7850b2347efc084311d664e14263cba1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Feb 7 23:45:37 2012 +0000

    sna: Check that we successfully retired an active linear buffer
    
    If we go to the trouble of running retire before searching, we may as
    well check that we retired something before proceeding to check all the
    inactive lists.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0c2f547..94b6c18 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1945,11 +1945,16 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
 	if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE)
 		return NULL;
 
-	if (!use_active &&
-	    list_is_empty(inactive(kgem, num_pages)) &&
-	    !list_is_empty(active(kgem, num_pages, I915_TILING_NONE)) &&
-	    !kgem_retire(kgem))
-		return NULL;
+	if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
+		if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE)))
+			return NULL;
+
+		if (!kgem_retire(kgem))
+			return NULL;
+
+		if (list_is_empty(inactive(kgem, num_pages)))
+			return NULL;
+	}
 
 	if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
 		int for_cpu = !!(flags & CREATE_CPU_MAP);
commit 207b4d4482a6af4a39472ec20ff04fa0c9322d73
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Feb 7 21:56:29 2012 +0000

    sna: Relax must-be-blittable rules for gen4+
    
    The render pipeline is actually more flexible than the blitter for
    dealing with large surfaces and so the BLT is no longer the limiting
    factor on gen4+.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index e80eaae..0c2f547 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -799,6 +799,9 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 	uint32_t tile_width, tile_height;
 	uint32_t size;
 
+	assert(width <= MAXSHORT);
+	assert(height <= MAXSHORT);
+
 	if (kgem->gen < 30) {
 		if (tiling) {
 			tile_width = 512;
@@ -823,32 +826,26 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
 		break;
 	}
 
-	/* If it is too wide for the blitter, don't even bother.  */
 	*pitch = ALIGN(width * bpp / 8, tile_width);
-	if (kgem->gen < 40) {
-		if (tiling != I915_TILING_NONE) {
-			if (*pitch > 8192)
-				return 0;
-			for (size = tile_width; size < *pitch; size <<= 1)
-				;
-			*pitch = size;
-		} else {
-			if (*pitch >= 32768)
-				return 0;
-		}
+	height = ALIGN(height, tile_height);
+	if (kgem->gen >= 40)
+		return PAGE_ALIGN(*pitch * height);
+
+	/* If it is too wide for the blitter, don't even bother.  */
+	if (tiling != I915_TILING_NONE) {
+		if (*pitch > 8192)
+			return 0;
+
+		for (size = tile_width; size < *pitch; size <<= 1)
+			;
+		*pitch = size;
 	} else {
-		int limit = 32768;
-		if (tiling)
-			limit *= 4;
-		if (*pitch >= limit)
+		if (*pitch >= 32768)
 			return 0;
 	}
-	height = ALIGN(height, tile_height);
-	if (height >= 65536)
-		return 0;
 
 	size = *pitch * height;
-	if (relaxed_fencing || tiling == I915_TILING_NONE || kgem->gen >= 40)
+	if (relaxed_fencing || tiling == I915_TILING_NONE)
 		return PAGE_ALIGN(size);
 
 	/*  We need to allocate a pot fence region for a tiled buffer. */
@@ -2233,6 +2230,9 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
 	if (depth < 8 || kgem->wedged)
 		return 0;
 
+	if (width > MAXSHORT || height > MAXSHORT)
+		return 0;
+
 	size = kgem_surface_size(kgem, false, false,
 				 width, height, bpp,
 				 I915_TILING_NONE, &pitch);