xf86-video-intel: 4 commits - src/sna/gen2_render.c src/sna/gen3_render.c src/sna/gen4_render.c src/sna/gen5_render.c src/sna/gen6_render.c src/sna/gen7_render.c src/sna/kgem.c
Chris Wilson
ickle at kemper.freedesktop.org
Wed Feb 8 12:12:18 PST 2012
src/sna/gen2_render.c | 8 +++-
src/sna/gen3_render.c | 8 +++-
src/sna/gen4_render.c | 8 +++-
src/sna/gen5_render.c | 8 +++-
src/sna/gen6_render.c | 8 +++-
src/sna/gen7_render.c | 8 +++-
src/sna/kgem.c | 89 ++++++++++++++++++++++++++++----------------------
7 files changed, 93 insertions(+), 44 deletions(-)
New commits:
commit 4d8369f8e60fd4f5a0ef49f3e9866ea5ecb21927
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Feb 8 13:15:46 2012 +0000
sna/gen2+: Force upload rather than perform source transformations on the CPU
If both the source and destination is on the CPU, then the thinking was
it would be quicker to operate on those on the CPU rather than copy both
to the GPU and then perform the operation. This turns out to be a false
assumption if transformation is involved -- something to be reconsidered
if pixman should ever be improved.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c
index ed48ce6..64b4e7c 100644
--- a/src/sna/gen2_render.c
+++ b/src/sna/gen2_render.c
@@ -1507,9 +1507,15 @@ has_alphamap(PicturePtr p)
}
static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
need_upload(PicturePtr p)
{
- return p->pDrawable && unattached(p->pDrawable);
+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
}
static bool
diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c
index fc006ac..97e5839 100644
--- a/src/sna/gen3_render.c
+++ b/src/sna/gen3_render.c
@@ -2419,9 +2419,15 @@ has_alphamap(PicturePtr p)
}
static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
need_upload(PicturePtr p)
{
- return p->pDrawable && unattached(p->pDrawable);
+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
}
static bool
diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c
index 6246538..6e7d4be 100644
--- a/src/sna/gen4_render.c
+++ b/src/sna/gen4_render.c
@@ -2016,9 +2016,15 @@ has_alphamap(PicturePtr p)
}
static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
need_upload(PicturePtr p)
{
- return p->pDrawable && unattached(p->pDrawable);
+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
}
static bool
diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c
index b9c7a92..7ac993c 100644
--- a/src/sna/gen5_render.c
+++ b/src/sna/gen5_render.c
@@ -2044,9 +2044,15 @@ has_alphamap(PicturePtr p)
}
static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
need_upload(PicturePtr p)
{
- return p->pDrawable && unattached(p->pDrawable);
+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
}
static bool
diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c
index 17789e9..1476ff7 100644
--- a/src/sna/gen6_render.c
+++ b/src/sna/gen6_render.c
@@ -2325,9 +2325,15 @@ has_alphamap(PicturePtr p)
}
static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
need_upload(PicturePtr p)
{
- return p->pDrawable && unattached(p->pDrawable);
+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
}
static bool
diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c
index 9757405..5740a42 100644
--- a/src/sna/gen7_render.c
+++ b/src/sna/gen7_render.c
@@ -2329,9 +2329,15 @@ has_alphamap(PicturePtr p)
}
static bool
+untransformed(PicturePtr p)
+{
+ return !p->transform || pixman_transform_is_int_translate(p->transform);
+}
+
+static bool
need_upload(PicturePtr p)
{
- return p->pDrawable && unattached(p->pDrawable);
+ return p->pDrawable && unattached(p->pDrawable) && untransformed(p);
}
static bool
commit 8634d461bd9e5a3d3f75b0efc11db87b8d3e1245
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Wed Feb 8 09:13:27 2012 +0000
sna: Limit max CPU bo size to prevent aperture thrashing on upload
Copying between two objects that consume more than the available GATT
space is a painful experience due to the forced use of an intermediatory
and eviction on every batch. The tiled upload paths are in comparison
remarkably efficient, so favour their use when handling extremely large
buffers.
This reverses the previous idea in that we now prefer large GPU bo
rather than large CPU bo, as the render pipeline is far more flexible
for handling those than the blitter is for handling the CPU bo (at least
for gen4+).
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 94b6c18..1c23320 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -575,6 +575,7 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
{
struct drm_i915_gem_get_aperture aperture;
size_t totalram;
+ unsigned half_gpu_max;
unsigned int i, j;
memset(kgem, 0, sizeof(*kgem));
@@ -679,7 +680,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
kgem->min_alignment = 64;
kgem->max_object_size = 2 * kgem->aperture_total / 3;
- kgem->max_cpu_size = kgem->max_object_size;
kgem->max_gpu_size = kgem->max_object_size;
if (!kgem->has_llc)
kgem->max_gpu_size = MAX_CACHE_SIZE;
@@ -691,16 +691,6 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
if (kgem->max_gpu_size > kgem->aperture_low)
kgem->max_gpu_size = kgem->aperture_low;
}
- if (kgem->max_gpu_size > kgem->max_cpu_size)
- kgem->max_gpu_size = kgem->max_cpu_size;
-
- kgem->max_upload_tile_size = kgem->aperture_mappable / 2;
- if (kgem->max_upload_tile_size > kgem->max_gpu_size / 2)
- kgem->max_upload_tile_size = kgem->max_gpu_size / 2;
-
- kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
- if (kgem->max_copy_tile_size > kgem->max_gpu_size / 2)
- kgem->max_copy_tile_size = kgem->max_gpu_size / 2;
totalram = total_ram_size();
if (totalram == 0) {
@@ -708,14 +698,32 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, int gen)
__FUNCTION__));
totalram = kgem->aperture_total;
}
+ DBG(("%s: total ram=%ld\n", __FUNCTION__, (long)totalram));
if (kgem->max_object_size > totalram / 2)
kgem->max_object_size = totalram / 2;
- if (kgem->max_cpu_size > totalram / 2)
- kgem->max_cpu_size = totalram / 2;
if (kgem->max_gpu_size > totalram / 4)
kgem->max_gpu_size = totalram / 4;
+ half_gpu_max = kgem->max_gpu_size / 2;
+ if (kgem->gen >= 40)
+ kgem->max_cpu_size = half_gpu_max;
+ else
+ kgem->max_cpu_size = kgem->max_object_size;
+
+ kgem->max_copy_tile_size = (MAX_CACHE_SIZE + 1)/2;
+ if (kgem->max_copy_tile_size > half_gpu_max)
+ kgem->max_copy_tile_size = half_gpu_max;
+
+ if (kgem->has_llc)
+ kgem->max_upload_tile_size = kgem->max_copy_tile_size;
+ else
+ kgem->max_upload_tile_size = kgem->aperture_mappable / 4;
+ if (kgem->max_upload_tile_size > half_gpu_max)
+ kgem->max_upload_tile_size = half_gpu_max;
+
kgem->large_object_size = MAX_CACHE_SIZE;
+ if (kgem->large_object_size > kgem->max_cpu_size)
+ kgem->large_object_size = kgem->max_cpu_size;
if (kgem->large_object_size > kgem->max_gpu_size)
kgem->large_object_size = kgem->max_gpu_size;
commit 5b16972d7850b2347efc084311d664e14263cba1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Feb 7 23:45:37 2012 +0000
sna: Check that we successfully retired an active linear buffer
If we go to the trouble of running retire before searching, we may as
well check that we retired something before proceeding to check all the
inactive lists.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 0c2f547..94b6c18 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1945,11 +1945,16 @@ search_linear_cache(struct kgem *kgem, unsigned int num_pages, unsigned flags)
if (num_pages >= MAX_CACHE_SIZE / PAGE_SIZE)
return NULL;
- if (!use_active &&
- list_is_empty(inactive(kgem, num_pages)) &&
- !list_is_empty(active(kgem, num_pages, I915_TILING_NONE)) &&
- !kgem_retire(kgem))
- return NULL;
+ if (!use_active && list_is_empty(inactive(kgem, num_pages))) {
+ if (list_is_empty(active(kgem, num_pages, I915_TILING_NONE)))
+ return NULL;
+
+ if (!kgem_retire(kgem))
+ return NULL;
+
+ if (list_is_empty(inactive(kgem, num_pages)))
+ return NULL;
+ }
if (!use_active && flags & (CREATE_CPU_MAP | CREATE_GTT_MAP)) {
int for_cpu = !!(flags & CREATE_CPU_MAP);
commit 207b4d4482a6af4a39472ec20ff04fa0c9322d73
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Tue Feb 7 21:56:29 2012 +0000
sna: Relax must-be-blittable rules for gen4+
The render pipeline is actually more flexible than the blitter for
dealing with large surfaces and so the BLT is no longer the limiting
factor on gen4+.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index e80eaae..0c2f547 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -799,6 +799,9 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
uint32_t tile_width, tile_height;
uint32_t size;
+ assert(width <= MAXSHORT);
+ assert(height <= MAXSHORT);
+
if (kgem->gen < 30) {
if (tiling) {
tile_width = 512;
@@ -823,32 +826,26 @@ static uint32_t kgem_surface_size(struct kgem *kgem,
break;
}
- /* If it is too wide for the blitter, don't even bother. */
*pitch = ALIGN(width * bpp / 8, tile_width);
- if (kgem->gen < 40) {
- if (tiling != I915_TILING_NONE) {
- if (*pitch > 8192)
- return 0;
- for (size = tile_width; size < *pitch; size <<= 1)
- ;
- *pitch = size;
- } else {
- if (*pitch >= 32768)
- return 0;
- }
+ height = ALIGN(height, tile_height);
+ if (kgem->gen >= 40)
+ return PAGE_ALIGN(*pitch * height);
+
+ /* If it is too wide for the blitter, don't even bother. */
+ if (tiling != I915_TILING_NONE) {
+ if (*pitch > 8192)
+ return 0;
+
+ for (size = tile_width; size < *pitch; size <<= 1)
+ ;
+ *pitch = size;
} else {
- int limit = 32768;
- if (tiling)
- limit *= 4;
- if (*pitch >= limit)
+ if (*pitch >= 32768)
return 0;
}
- height = ALIGN(height, tile_height);
- if (height >= 65536)
- return 0;
size = *pitch * height;
- if (relaxed_fencing || tiling == I915_TILING_NONE || kgem->gen >= 40)
+ if (relaxed_fencing || tiling == I915_TILING_NONE)
return PAGE_ALIGN(size);
/* We need to allocate a pot fence region for a tiled buffer. */
@@ -2233,6 +2230,9 @@ unsigned kgem_can_create_2d(struct kgem *kgem,
if (depth < 8 || kgem->wedged)
return 0;
+ if (width > MAXSHORT || height > MAXSHORT)
+ return 0;
+
size = kgem_surface_size(kgem, false, false,
width, height, bpp,
I915_TILING_NONE, &pitch);
More information about the xorg-commit
mailing list