xf86-video-intel: 11 commits - src/sna/blt.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_io.c src/sna/sna_trapezoids_imprecise.c
Chris Wilson
ickle at kemper.freedesktop.org
Sat Jun 28 23:17:55 PDT 2014
src/sna/blt.c | 152 ++++++++++++++++---------------------
src/sna/kgem.c | 9 +-
src/sna/kgem.h | 10 ++
src/sna/sna_accel.c | 151 +++++++++++++++++++++++++++++-------
src/sna/sna_io.c | 10 +-
src/sna/sna_trapezoids_imprecise.c | 17 +++-
6 files changed, 228 insertions(+), 121 deletions(-)
New commits:
commit 6b906ae742ec96eeef403191d3cdded6a23a70b7
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Jun 29 07:02:44 2014 +0100
sna: Update allocation of CPU bo to avoid creating active buffers
Since we now prefer CPU detiling, exactly when we want active/inactive
buffers is a little more complex - and we also need to take into account
when we want to use the CPU bo as a render target.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index efcde3e..54fece4 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -508,7 +508,7 @@ static bool must_check
sna_pixmap_alloc_cpu(struct sna *sna,
PixmapPtr pixmap,
struct sna_pixmap *priv,
- bool from_gpu)
+ unsigned flags)
{
/* Restore after a GTT mapping? */
assert(priv->gpu_damage == NULL || priv->gpu_bo);
@@ -520,14 +520,21 @@ sna_pixmap_alloc_cpu(struct sna *sna,
assert(priv->stride);
if (priv->create & KGEM_CAN_CREATE_CPU) {
+ unsigned hint;
+
DBG(("%s: allocating CPU buffer (%dx%d)\n", __FUNCTION__,
pixmap->drawable.width, pixmap->drawable.height));
+ hint = 0;
+ if ((flags & MOVE_ASYNC_HINT) == 0 &&
+ ((flags & MOVE_READ) == 0 || (priv->gpu_damage && !priv->clear && !sna->kgem.has_llc)))
+ hint = CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE;
+
priv->cpu_bo = kgem_create_cpu_2d(&sna->kgem,
pixmap->drawable.width,
pixmap->drawable.height,
pixmap->drawable.bitsPerPixel,
- from_gpu ? 0 : CREATE_CPU_MAP | CREATE_INACTIVE | CREATE_NO_THROTTLE);
+ hint);
if (priv->cpu_bo) {
priv->ptr = kgem_bo_map__cpu(&sna->kgem, priv->cpu_bo);
if (priv->ptr) {
@@ -2165,7 +2172,8 @@ _sna_pixmap_move_to_cpu(PixmapPtr pixmap, unsigned int flags)
skip_inplace_map:
sna_damage_destroy(&priv->gpu_damage);
priv->clear = false;
- if (priv->cpu_bo && !priv->cpu_bo->flush &&
+ if ((flags & MOVE_ASYNC_HINT) == 0 &&
+ priv->cpu_bo && !priv->cpu_bo->flush &&
__kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) {
DBG(("%s: discarding busy CPU bo\n", __FUNCTION__));
assert(!priv->shm);
@@ -2175,7 +2183,7 @@ skip_inplace_map:
sna_pixmap_free_cpu(sna, priv, false);
assert(priv->mapped == MAPPED_NONE);
- if (!sna_pixmap_alloc_cpu(sna, pixmap, priv, false))
+ if (!sna_pixmap_alloc_cpu(sna, pixmap, priv, 0))
return false;
assert(priv->mapped == MAPPED_NONE);
assert(pixmap->devPrivate.ptr == PTR(priv->ptr));
@@ -2277,8 +2285,7 @@ skip_inplace_map:
assert(priv->mapped == MAPPED_NONE);
if (pixmap->devPrivate.ptr == NULL &&
- !sna_pixmap_alloc_cpu(sna, pixmap, priv,
- flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0))
+ !sna_pixmap_alloc_cpu(sna, pixmap, priv, flags))
return false;
assert(priv->mapped == MAPPED_NONE);
assert(pixmap->devPrivate.ptr == PTR(priv->ptr));
@@ -2290,6 +2297,15 @@ skip_inplace_map:
pixmap->devKind, pixmap->devKind * pixmap->drawable.height));
if (priv->cpu_bo) {
+ if ((flags & MOVE_ASYNC_HINT || priv->cpu_bo->exec) &&
+ sna->render.fill_one(sna,
+ pixmap, priv->cpu_bo, priv->clear_color,
+ 0, 0,
+ pixmap->drawable.width,
+ pixmap->drawable.height,
+ GXcopy))
+ goto clear_done;
+
DBG(("%s: syncing CPU bo\n", __FUNCTION__));
kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu));
@@ -2311,6 +2327,7 @@ skip_inplace_map:
priv->clear_color);
}
+clear_done:
sna_damage_all(&priv->cpu_damage, pixmap);
sna_pixmap_free_gpu(sna, priv);
assert(priv->gpu_damage == NULL);
@@ -2474,6 +2491,27 @@ static inline bool region_inplace(struct sna *sna,
>= sna->kgem.half_cpu_cache_pages;
}
+static bool cpu_clear_boxes(struct sna *sna,
+ PixmapPtr pixmap,
+ struct sna_pixmap *priv,
+ const BoxRec *box, int n)
+{
+ struct sna_fill_op fill;
+
+ if (!sna_fill_init_blt(&fill, sna,
+ pixmap, priv->cpu_bo,
+ GXcopy, priv->clear_color,
+ FILL_BOXES)) {
+ DBG(("%s: unsupported fill\n",
+ __FUNCTION__));
+ return false;
+ }
+
+ fill.boxes(sna, &fill, box, n);
+ fill.done(sna, &fill);
+ return true;
+}
+
bool
sna_drawable_move_region_to_cpu(DrawablePtr drawable,
RegionPtr region,
@@ -2602,7 +2640,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
sna_pixmap_unmap(pixmap, priv);
assert(priv->mapped == MAPPED_NONE);
if (pixmap->devPrivate.ptr == NULL &&
- !sna_pixmap_alloc_cpu(sna, pixmap, priv, false))
+ !sna_pixmap_alloc_cpu(sna, pixmap, priv, flags))
return false;
assert(priv->mapped == MAPPED_NONE);
assert(pixmap->devPrivate.ptr == PTR(priv->ptr));
@@ -2788,8 +2826,7 @@ move_to_cpu:
assert(priv->mapped == MAPPED_NONE);
if (pixmap->devPrivate.ptr == NULL &&
- !sna_pixmap_alloc_cpu(sna, pixmap, priv,
- flags & MOVE_READ ? priv->gpu_damage && !priv->clear : 0)) {
+ !sna_pixmap_alloc_cpu(sna, pixmap, priv, flags)) {
DBG(("%s: CPU bo allocation failed, trying full move-to-cpu\n", __FUNCTION__));
goto move_to_cpu;
}
@@ -2819,6 +2856,10 @@ move_to_cpu:
DBG(("%s: pending clear, doing partial fill\n", __FUNCTION__));
if (priv->cpu_bo) {
+ if ((flags & MOVE_ASYNC_HINT || priv->cpu_bo->exec) &&
+ cpu_clear_boxes(sna, pixmap, priv, box, n))
+ goto clear_done;
+
DBG(("%s: syncing CPU bo\n", __FUNCTION__));
kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
assert(pixmap->devPrivate.ptr == MAP(priv->cpu_bo->map__cpu));
@@ -2836,6 +2877,7 @@ move_to_cpu:
box++;
} while (--n);
+clear_done:
if (flags & MOVE_WRITE ||
region->extents.x2 - region->extents.x1 > 1 ||
region->extents.y2 - region->extents.y1 > 1) {
commit b961d7323369284ea2c3db47d30c27ffe01a9040
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sun Jun 29 07:00:58 2014 +0100
sna: Sync CPU bo before writes
Fixes regression from
commit 961139f5878572ebea268a0bbf47caf05af9093f [2.99.912]
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri May 30 09:45:15 2014 +0100
sna: Use manual detiling for downloads
Reported-by: Harald Judt <h.judt at gmx.at>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80560
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 7a9610c..efcde3e 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1679,14 +1679,22 @@ static inline bool gpu_bo_download(struct sna *sna,
if (!kgem_bo_can_map__cpu(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC))
return false;
- if (idle && __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
- return false;
+ if (idle) {
+ if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
+ return false;
+
+ if (priv->cpu_bo && __kgem_bo_is_busy(&sna->kgem, priv->cpu_bo))
+ return false;
+ }
src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
if (src == NULL)
return false;
kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC);
+
+ if (priv->cpu_bo)
+ kgem_bo_sync__cpu(&sna->kgem, priv->cpu_bo);
assert(has_coherent_ptr(sna, priv, MOVE_WRITE));
if (sigtrap_get())
commit 53ef9e762a6e7802b3d5f8fba9ac17ff95545c10
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 21:07:08 2014 +0100
sna: Only preferentially upload through the GTT for large transfers
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3e5b036..7a9610c 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4708,6 +4708,11 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region,
break;
}
+ if (priv->gpu_damage == NULL && !box_inplace(pixmap, ®ion->extents)) {
+ DBG(("%s: no, too small to bother with using the GTT\n", __FUNCTION__));
+ return false;
+ }
+
if (!kgem_bo_can_map(&sna->kgem, priv->gpu_bo)) {
DBG(("%s: no, cannot map through the CPU\n", __FUNCTION__));
return false;
commit 0955f12ae04011593b71817e3151b8fb7c228899
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 17:38:54 2014 +0100
sna: Prefer linear if below tile_width
Be stricter in order to allow greater use of CPU bo.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 22aef25..3f56c32 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -4194,13 +4194,13 @@ int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int
goto done;
}
- if (tiling == I915_TILING_X && width * bpp <= 8*8*512/10) {
+ if (tiling == I915_TILING_X && width * bpp <= 8*512) {
DBG(("%s: too thin [width %d, %d bpp] for TILING_X\n",
__FUNCTION__, width, bpp));
tiling = I915_TILING_NONE;
goto done;
}
- if (tiling == I915_TILING_Y && width * bpp <= 8*8*128/10) {
+ if (tiling == I915_TILING_Y && width * bpp < 8*128) {
DBG(("%s: too thin [%d] for TILING_Y\n",
__FUNCTION__, width));
tiling = I915_TILING_NONE;
commit 3ef966f4c5bae07108ce2720f4da3c3c4e41e1aa
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 14:23:29 2014 +0100
sna/io: Prefer CPU copies on LLC
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 9e175a7..eaa2052 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -117,6 +117,8 @@ read_boxes_inplace__cpu(struct kgem *kgem,
if (sigtrap_get())
return false;
+ DBG(("%s x %d\n", __FUNCTION__, n));
+
if (bo->tiling == I915_TILING_X) {
do {
memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
@@ -210,10 +212,13 @@ static bool download_inplace(struct kgem *kgem,
if (FORCE_INPLACE)
return FORCE_INPLACE > 0;
+ if (cpu)
+ return true;
+
if (kgem->can_blt_cpu && kgem->max_cpu_size)
return false;
- return !__kgem_bo_is_busy(kgem, bo) || cpu;
+ return !__kgem_bo_is_busy(kgem, bo);
}
void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
@@ -253,7 +258,7 @@ void sna_read_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *src_bo,
* this path.
*/
- if (download_inplace(kgem, dst, src_bo, box ,nbox)) {
+ if (download_inplace(kgem, dst, src_bo, box, nbox)) {
fallback:
read_boxes_inplace(kgem, dst, src_bo, box, nbox);
return;
commit 9fc052da5c4246402d2707b3a91efffa7dd81e08
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 14:22:17 2014 +0100
sna: Don't discard damage for SHM pixmaps
We don't really want to rendering into SHM pixmaps except for copying
back due to the strict serialisation requirements.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 2a4c567..3e5b036 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -6126,7 +6126,11 @@ upload_inplace:
static void discard_cpu_damage(struct sna *sna, struct sna_pixmap *priv)
{
+ if (priv->cpu_damage == NULL && !priv->shm)
+ return;
+
DBG(("%s: discarding existing CPU damage\n", __FUNCTION__));
+
if (kgem_bo_discard_cache(priv->gpu_bo, true)) {
DBG(("%s: discarding cached upload buffer\n", __FUNCTION__));
assert(DAMAGE_IS_ALL(priv->cpu_damage));
@@ -6137,6 +6141,7 @@ static void discard_cpu_damage(struct sna *sna, struct sna_pixmap *priv)
kgem_bo_destroy(&sna->kgem, priv->gpu_bo);
priv->gpu_bo = NULL;
}
+
sna_damage_destroy(&priv->cpu_damage);
list_del(&priv->flush_list);
commit 0f8b39d24ff15cf3373ac7293f12772ebe16b68b
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 14:21:36 2014 +0100
sna: Check for a mappable GPU bo before migrating damage
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index b7e3d90..2a4c567 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4520,6 +4520,14 @@ try_upload__tiled_x(PixmapPtr pixmap, RegionRec *region,
return false;
}
+ if (!sna_pixmap_move_area_to_gpu(pixmap, ®ion->extents,
+ MOVE_WRITE | (region->data ? MOVE_READ : 0)))
+ return false;
+
+ if ((priv->create & KGEM_CAN_CREATE_LARGE) == 0 &&
+ __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
+ return false;
+
dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
@@ -4622,6 +4630,11 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region,
if (!USE_INPLACE)
return false;
+ assert(priv);
+
+ if (priv->shm && priv->gpu_damage == NULL)
+ return false;
+
replaces = region_subsumes_pixmap(region, pixmap);
DBG(("%s: bo? %d, can map? %d, replaces? %d\n", __FUNCTION__,
@@ -4678,18 +4691,10 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region,
}
}
- if (!sna_pixmap_move_area_to_gpu(pixmap, ®ion->extents,
- MOVE_WRITE | (region->data ? MOVE_READ : 0)))
- return false;
-
if (priv->gpu_bo == NULL &&
!create_upload_tiled_x(&sna->kgem, pixmap, priv, ignore_cpu))
return false;
- if ((priv->create & KGEM_CAN_CREATE_LARGE) == 0 &&
- __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
- return false;
-
DBG(("%s: tiling=%d\n", __FUNCTION__, priv->gpu_bo->tiling));
switch (priv->gpu_bo->tiling) {
case I915_TILING_Y:
@@ -4708,6 +4713,14 @@ try_upload__inplace(PixmapPtr pixmap, RegionRec *region,
return false;
}
+ if (!sna_pixmap_move_area_to_gpu(pixmap, ®ion->extents,
+ MOVE_WRITE | (region->data ? MOVE_READ : 0)))
+ return false;
+
+ if ((priv->create & KGEM_CAN_CREATE_LARGE) == 0 &&
+ __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
+ return false;
+
dst = kgem_bo_map(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
@@ -4768,6 +4781,14 @@ done:
sna_damage_destroy(&priv->cpu_damage);
else
sna_damage_subtract(&priv->cpu_damage, region);
+
+ if (priv->cpu_damage == NULL) {
+ list_del(&priv->flush_list);
+ sna_damage_all(&priv->gpu_damage, pixmap);
+ }
+
+ if (priv->shm)
+ sna_add_flush_pixmap(sna, priv, priv->cpu_bo);
}
assert(!priv->clear);
@@ -4895,6 +4916,7 @@ try_upload__fast(PixmapPtr pixmap, RegionRec *region,
return false;
if (ignore_cpu_damage(sna, priv, region)) {
+ DBG(("%s: ignore existing cpu damage (if any)\n", __FUNCTION__));
if (try_upload__inplace(pixmap, region, x, y, w, h, bits, stride))
return true;
}
@@ -6057,6 +6079,8 @@ upload_inplace:
}
dst_priv->clear = false;
+ assert(has_coherent_ptr(sna, src_priv, MOVE_READ));
+
box = region_rects(region);
n = region_num_rects(region);
if (dst_priv->gpu_bo->tiling) {
commit cfdaee4a7e45689b0fbbc8c3166d28d69797e759
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 14:20:00 2014 +0100
sna: Skip adding damage if it is already contained
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9f5c0b4..b7e3d90 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2865,7 +2865,7 @@ move_to_cpu:
DBG(("%s: region already in CPU damage\n",
__FUNCTION__));
- goto done;
+ goto already_damaged;
}
}
@@ -2986,6 +2986,7 @@ done:
}
}
+already_damaged:
if (dx | dy)
RegionTranslate(region, -dx, -dy);
commit 80752fb2794faa581d891b24148eaf51c42afd25
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 14:19:22 2014 +0100
sna: Tidy calling memcpy_from_tiled
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 898f943..22aef25 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -5968,6 +5968,7 @@ void *kgem_bo_map__cpu(struct kgem *kgem, struct kgem_bo *bo)
assert(!bo->purged);
assert(list_is_empty(&bo->list));
assert(bo->proxy == NULL);
+ assert_tiling(kgem, bo);
if (bo->map__cpu)
return MAP(bo->map__cpu);
@@ -6086,6 +6087,8 @@ void kgem_bo_sync__cpu(struct kgem *kgem, struct kgem_bo *bo)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(!bo->scanout);
+ assert_tiling(kgem, bo);
+
kgem_bo_submit(kgem, bo);
/* SHM pixmaps use proxies for subpage offsets */
@@ -6120,6 +6123,7 @@ void kgem_bo_sync__cpu_full(struct kgem *kgem, struct kgem_bo *bo, bool write)
{
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(!bo->scanout || !write);
+ assert_tiling(kgem, bo);
if (write || bo->needs_flush)
kgem_bo_submit(kgem, bo);
@@ -6165,6 +6169,7 @@ void kgem_bo_sync__gtt(struct kgem *kgem, struct kgem_bo *bo)
DBG(("%s: handle=%d\n", __FUNCTION__, bo->handle));
assert(bo->refcnt);
assert(bo->proxy == NULL);
+ assert_tiling(kgem, bo);
kgem_bo_submit(kgem, bo);
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index e66bffb..be9b7e8 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -787,6 +787,11 @@ memcpy_to_tiled_x(struct kgem *kgem,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height)
{
+ assert(kgem->memcpy_to_tiled_x);
+ assert(src_x >= 0 && src_y >= 0);
+ assert(dst_x >= 0 && dst_y >= 0);
+ assert(8*src_stride >= (src_x+width) * bpp);
+ assert(8*dst_stride >= (dst_x+width) * bpp);
return kgem->memcpy_to_tiled_x(src, dst, bpp,
src_stride, dst_stride,
src_x, src_y,
@@ -802,6 +807,11 @@ memcpy_from_tiled_x(struct kgem *kgem,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height)
{
+ assert(kgem->memcpy_from_tiled_x);
+ assert(src_x >= 0 && src_y >= 0);
+ assert(dst_x >= 0 && dst_y >= 0);
+ assert(8*src_stride >= (src_x+width) * bpp);
+ assert(8*dst_stride >= (dst_x+width) * bpp);
return kgem->memcpy_from_tiled_x(src, dst, bpp,
src_stride, dst_stride,
src_x, src_y,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a559907..9f5c0b4 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1693,30 +1693,32 @@ static inline bool gpu_bo_download(struct sna *sna,
return false;
if (priv->gpu_bo->tiling) {
+ int bpp = priv->pixmap->drawable.bitsPerPixel;
+ void *dst = priv->pixmap->devPrivate.ptr;
+ int dst_pitch = priv->pixmap->devKind;
+
DBG(("%s: download through a tiled CPU map\n", __FUNCTION__));
do {
DBG(("%s: box (%d, %d), (%d, %d)\n",
__FUNCTION__, box->x1, box->y1, box->x2, box->y2));
- memcpy_from_tiled_x(&sna->kgem, src,
- priv->pixmap->devPrivate.ptr,
- priv->pixmap->drawable.bitsPerPixel,
- priv->gpu_bo->pitch,
- priv->pixmap->devKind,
+ memcpy_from_tiled_x(&sna->kgem, src, dst, bpp,
+ priv->gpu_bo->pitch, dst_pitch,
box->x1, box->y1,
box->x1, box->y1,
box->x2 - box->x1, box->y2 - box->y1);
box++;
} while (--n);
} else {
+ int bpp = priv->pixmap->drawable.bitsPerPixel;
+ void *dst = priv->pixmap->devPrivate.ptr;
+ int dst_pitch = priv->pixmap->devKind;
+
DBG(("%s: download through a linear CPU map\n", __FUNCTION__));
do {
DBG(("%s: box (%d, %d), (%d, %d)\n",
__FUNCTION__, box->x1, box->y1, box->x2, box->y2));
- memcpy_blt(src,
- priv->pixmap->devPrivate.ptr,
- priv->pixmap->drawable.bitsPerPixel,
- priv->gpu_bo->pitch,
- priv->pixmap->devKind,
+ memcpy_blt(src, dst, bpp,
+ priv->gpu_bo->pitch, dst_pitch,
box->x1, box->y1,
box->x1, box->y1,
box->x2 - box->x1, box->y2 - box->y1);
@@ -4934,6 +4936,10 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
get_drawable_deltas(drawable, pixmap, &dx, &dy);
x += dx + drawable->x;
y += dy + drawable->y;
+ assert(region->extents.x1 >= x);
+ assert(region->extents.y1 >= y);
+ assert(region->extents.x2 <= x + w);
+ assert(region->extents.y2 <= y + h);
if (try_upload__fast(pixmap, region, x, y, w, h, bits, stride))
return true;
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index f464dce..9e175a7 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -118,7 +118,6 @@ read_boxes_inplace__cpu(struct kgem *kgem,
return false;
if (bo->tiling == I915_TILING_X) {
- assert(kgem->memcpy_from_tiled_x);
do {
memcpy_from_tiled_x(kgem, src, dst, bpp, src_pitch, dst_pitch,
box->x1, box->y1,
commit 2a0176379f0ff290d276adc72d44dfddafd96da5
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 14:18:23 2014 +0100
sna: Micro-optimise unswizzling tiling/detiling
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/blt.c b/src/sna/blt.c
index b61f88b..b5bfee6 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -233,55 +233,47 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
const unsigned tile_size = 4096;
const unsigned cpp = bpp / 8;
- const unsigned stride_tiles = dst_stride / tile_width;
- const unsigned swizzle_pixels = tile_width / cpp;
- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
- const unsigned tile_mask = (1 << tile_pixels) - 1;
-
- unsigned x, y;
+ const unsigned tile_pixels = tile_width / cpp;
+ const unsigned tile_shift = ffs(tile_pixels) - 1;
+ const unsigned tile_mask = tile_pixels - 1;
DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
__FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+ assert(src != dst);
- src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
-
- for (y = 0; y < height; ++y) {
- const uint32_t dy = y + dst_y;
- const uint32_t tile_row =
- (dy / tile_height * stride_tiles * tile_size +
- (dy & (tile_height-1)) * tile_width);
- const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
- uint32_t dx = dst_x, offset;
-
- x = width * cpp;
- if (dx & (swizzle_pixels - 1)) {
- const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
- const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
- offset = tile_row +
- (dx >> tile_pixels) * tile_size +
- (dx & tile_mask) * cpp;
- memcpy((char *)dst + offset, src_row, length * cpp);
-
- src_row += length * cpp;
- x -= length * cpp;
- dx += length;
+ if (src_x | src_y)
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+ assert(src_stride >= width * cpp);
+ src_stride -= width * cpp;
+
+ while (height--) {
+ unsigned w = width * cpp;
+ uint8_t *tile_row = dst;
+
+ tile_row += dst_y / tile_height * dst_stride * tile_height;
+ tile_row += (dst_y & (tile_height-1)) * tile_width;
+ if (dst_x) {
+ tile_row += (dst_x >> tile_shift) * tile_size;
+ if (dst_x & tile_mask) {
+ const unsigned x = (dst_x & tile_mask) * cpp;
+ const unsigned len = min(tile_width - x, w);
+ memcpy(tile_row + x, src, len);
+
+ tile_row += tile_size;
+ src = (const uint8_t *)src + len;
+ w -= len;
+ }
}
- while (x >= 512) {
- assert((dx & tile_mask) == 0);
- offset = tile_row + (dx >> tile_pixels) * tile_size;
-
- memcpy((char *)dst + offset, src_row, 512);
+ while (w >= tile_width) {
+ memcpy(tile_row, src, tile_width);
- src_row += 512;
- x -= 512;
- dx += swizzle_pixels;
- }
- if (x) {
- offset = tile_row +
- (dx >> tile_pixels) * tile_size +
- (dx & tile_mask) * cpp;
- memcpy((char *)dst + offset, src_row, x);
+ tile_row += tile_size;
+ src = (const uint8_t *)src + tile_width;
+ w -= tile_width;
}
+ memcpy(tile_row, src, w);
+ src = (const uint8_t *)src + src_stride + w;
+ dst_y++;
}
}
@@ -297,55 +289,47 @@ memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
const unsigned tile_size = 4096;
const unsigned cpp = bpp / 8;
- const unsigned stride_tiles = src_stride / tile_width;
- const unsigned swizzle_pixels = tile_width / cpp;
- const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
- const unsigned tile_mask = (1 << tile_pixels) - 1;
-
- unsigned x, y;
+ const unsigned tile_pixels = tile_width / cpp;
+ const unsigned tile_shift = ffs(tile_pixels) - 1;
+ const unsigned tile_mask = tile_pixels - 1;
DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
__FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+ assert(src != dst);
- dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
-
- for (y = 0; y < height; ++y) {
- const uint32_t sy = y + src_y;
- const uint32_t tile_row =
- (sy / tile_height * stride_tiles * tile_size +
- (sy & (tile_height-1)) * tile_width);
- uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
- uint32_t sx = src_x, offset;
-
- x = width * cpp;
- if (sx & (swizzle_pixels - 1)) {
- const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
- const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
- offset = tile_row +
- (sx >> tile_pixels) * tile_size +
- (sx & tile_mask) * cpp;
- memcpy(dst_row, (const char *)src + offset, length * cpp);
-
- dst_row += length * cpp;
- x -= length * cpp;
- sx += length;
+ if (dst_x | dst_y)
+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+ assert(dst_stride >= width * cpp);
+ dst_stride -= width * cpp;
+
+ while (height--) {
+ unsigned w = width * cpp;
+ const uint8_t *tile_row = src;
+
+ tile_row += src_y / tile_height * src_stride * tile_height;
+ tile_row += (src_y & (tile_height-1)) * tile_width;
+ if (src_x) {
+ tile_row += (src_x >> tile_shift) * tile_size;
+ if (src_x & tile_mask) {
+ const unsigned x = (src_x & tile_mask) * cpp;
+ const unsigned len = min(tile_width - x, w);
+ memcpy(dst, tile_row + x, len);
+
+ tile_row += tile_size;
+ dst = (uint8_t *)dst + len;
+ w -= len;
+ }
}
- while (x >= 512) {
- assert((sx & tile_mask) == 0);
- offset = tile_row + (sx >> tile_pixels) * tile_size;
-
- memcpy(dst_row, (const char *)src + offset, 512);
+ while (w >= tile_width) {
+ memcpy(dst, tile_row, tile_width);
- dst_row += 512;
- x -= 512;
- sx += swizzle_pixels;
- }
- if (x) {
- offset = tile_row +
- (sx >> tile_pixels) * tile_size +
- (sx & tile_mask) * cpp;
- memcpy(dst_row, (const char *)src + offset, x);
+ tile_row += tile_size;
+ dst = (uint8_t *)dst + tile_width;
+ w -= tile_width;
}
+ memcpy(dst, tile_row, w);
+ dst = (uint8_t *)dst + dst_stride + w;
+ src_y++;
}
}
commit 24cb50e53c789cb7a05d59ad103dda1c3a009485
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Sat Jun 28 07:05:55 2014 +0100
sna/trapezoids: Handle mono traps just in case
I disabled a few paths and ended up in an assert that mono trapezoids
shouldn't get that far...
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_trapezoids_imprecise.c b/src/sna/sna_trapezoids_imprecise.c
index 69f8ae8..ebde762 100644
--- a/src/sna/sna_trapezoids_imprecise.c
+++ b/src/sna/sna_trapezoids_imprecise.c
@@ -1505,6 +1505,15 @@ inplace_end_subrows(struct active_list *active, uint8_t *row,
}
static void
+convert_mono(uint8_t *ptr, int w)
+{
+ while (w--) {
+ *ptr = 0xff * (*ptr >= 0xf0);
+ ptr++;
+ }
+}
+
+static void
tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
{
int i, j, h = converter->extents.y2;
@@ -1516,7 +1525,6 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
int width = scratch->drawable.width;
__DBG(("%s: mono=%d, buf?=%d\n", __FUNCTION__, mono, buf != NULL));
- assert(!mono);
assert(converter->extents.y1 == 0);
assert(converter->extents.x1 == 0);
assert(scratch->drawable.depth == 8);
@@ -1552,6 +1560,8 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
if (do_full_step) {
memset(ptr, 0, width);
inplace_row(active, ptr, width);
+ if (mono)
+ convert_mono(ptr, width);
if (row != ptr)
memcpy(row, ptr, width);
@@ -1584,8 +1594,11 @@ tor_inplace(struct tor *converter, PixmapPtr scratch, int mono, uint8_t *buf)
}
assert(min >= 0 && max <= width);
memset(row, 0, min);
- if (max > min)
+ if (max > min) {
inplace_end_subrows(active, row+min, (int8_t*)ptr+min, max-min);
+ if (mono)
+ convert_mono(row+min, max-min);
+ }
if (max < width)
memset(row+max, 0, width-max);
}
More information about the xorg-commit
mailing list