xf86-video-intel: 6 commits - configure.ac src/intel_display.c src/sna/blt.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c
Chris Wilson
ickle at kemper.freedesktop.org
Thu Jun 27 08:41:37 PDT 2013
configure.ac | 3
src/intel_display.c | 2
src/sna/blt.c | 276 +++++++++++++++++++++++++++++++++++++++++++++++++++-
src/sna/kgem.c | 6 -
src/sna/kgem.h | 22 +++-
src/sna/sna_accel.c | 89 +++++++++++++++-
6 files changed, 382 insertions(+), 16 deletions(-)
New commits:
commit b5e85e495e55e2537d305b7bebacdf6f97b66199
Author: Roy.Li <rongqing.li at windriver.com>
Date: Thu Jun 27 14:10:14 2013 +0800
uxa: fix the compilation error with xorg-xserver <= 1.10
struct _Screen has no canDoBGNoneRoot when ABI_VIDEODRV_VERSION is less than 10.0
Signed-off-by: Roy.Li <rongqing.li at windriver.com>
diff --git a/src/intel_display.c b/src/intel_display.c
index 17168e5..0acb86d 100644
--- a/src/intel_display.c
+++ b/src/intel_display.c
@@ -2113,7 +2113,9 @@ void intel_copy_fb(ScrnInfoPtr scrn)
0, 0,
scrn->virtualX, scrn->virtualY);
intel->uxa_driver->done_copy(dst);
+#if ABI_VIDEODRV_VERSION >= SET_ABI_VERSION(10, 0)
pScreen->canDoBGNoneRoot = TRUE;
+#endif
cleanup_dst:
(*pScreen->DestroyPixmap)(dst);
commit 41715af4d009bfcb351946ddaa3a3ea3767a1429
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 27 16:36:52 2013 +0100
configure: SNA supports the old Xorgs
So allow it to be compiled by default for older Xorgs as well.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/configure.ac b/configure.ac
index 6721279..7425fda 100644
--- a/configure.ac
+++ b/configure.ac
@@ -203,9 +203,6 @@ AC_ARG_ENABLE(sna,
[SNA="$enableval"],
[SNA=auto])
-if test "x$SNA" = "xauto" && pkg-config --exists "xorg-server >= 1.10"; then
- SNA=yes
-fi
if test "x$SNA" != "xno"; then
AC_DEFINE(USE_SNA, 1, [Enable SNA support])
AC_CHECK_HEADERS([sys/sysinfo.h])
commit 7ce487617445c81f0178823de8896a2b73bbaaf1
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 27 16:08:43 2013 +0100
sna: Trim the large object threshold
Be kinder to smaller machines by lowering the threshold at which treat
an object as huge and worthy of avoiding duplication.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 5b78c83..3859e2d 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -1205,8 +1205,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
kgem->max_upload_tile_size = kgem->aperture_low;
kgem->large_object_size = MAX_CACHE_SIZE;
- if (kgem->large_object_size > kgem->max_gpu_size)
- kgem->large_object_size = kgem->max_gpu_size;
+ if (kgem->large_object_size > half_gpu_max)
+ kgem->large_object_size = half_gpu_max;
if (kgem->max_copy_tile_size > kgem->aperture_high/2)
kgem->max_copy_tile_size = kgem->aperture_high/2;
if (kgem->max_copy_tile_size > kgem->aperture_low)
commit 31467e18d2ccdc42b0601b43b581524859de1373
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 27 16:07:36 2013 +0100
sna: Prefer operating inplace with a very large GPU bo
As we strive to only keep one copy when working with very large objects,
so try operating inplace on a mapping for CPU operations with a large
GPU bo.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 46e383d..af68a14 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1785,6 +1785,12 @@ static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags)
return true;
}
+ if (priv->create & KGEM_CAN_CREATE_LARGE) {
+ DBG(("%s: large object, has GPU? %d\n",
+ __FUNCTION__, priv->gpu_bo));
+ return priv->gpu_bo != NULL;
+ }
+
if (flags & MOVE_WRITE && priv->gpu_bo&&kgem_bo_is_busy(priv->gpu_bo)) {
DBG(("%s: no, GPU is busy, so stage write\n", __FUNCTION__));
return false;
@@ -2261,8 +2267,9 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
}
if (USE_INPLACE &&
- (flags & (MOVE_READ | MOVE_ASYNC_HINT)) == 0 &&
- (priv->flush || box_inplace(pixmap, ®ion->extents))) {
+ (priv->create & KGEM_CAN_CREATE_LARGE ||
+ ((flags & (MOVE_READ | MOVE_ASYNC_HINT)) == 0 &&
+ (priv->flush || box_inplace(pixmap, ®ion->extents))))) {
DBG(("%s: marking for inplace hint (%d, %d)\n",
__FUNCTION__, priv->flush, box_inplace(pixmap, ®ion->extents)));
flags |= MOVE_INPLACE_HINT;
@@ -3938,15 +3945,22 @@ static bool can_upload_tiled_x(struct kgem *kgem, struct sna_pixmap *priv)
struct kgem_bo *bo = priv->gpu_bo;
assert(bo);
- if (priv->cow)
+ if (priv->cow) {
+ DBG(("%s: no, has COW\n", __FUNCTION__));
return false;
+ }
- if (bo->tiling != I915_TILING_X)
+ if (bo->tiling != I915_TILING_X) {
+ DBG(("%s: no, uses %d tiling\n", __FUNCTION__, bo->tiling));
return false;
+ }
- if (bo->scanout)
+ if (bo->scanout) {
+ DBG(("%s: no, is scanout\n", __FUNCTION__, bo->scanout));
return false;
+ }
+ DBG(("%s? domain=%d, has_llc=%d\n", __FUNCTION__, bo->domain, kgem->has_llc));
return bo->domain == DOMAIN_CPU || kgem->has_llc;
}
@@ -4025,7 +4039,8 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
return false;
assert(priv->gpu_bo->tiling == I915_TILING_X);
- if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
+ if ((priv->create & KGEM_CAN_CREATE_LARGE) == 0 &&
+ __kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
return false;
dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
commit b615ce97ec43ea8fe02e995244c757138abcb2de
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 27 10:45:22 2013 +0100
sna: Add a fast path for reading back from tiled X bo
This is lower latency than the double copy incurred for first moving the
bo to the CPU and then copying it back - but due to the less efficient
tiled memcpy, it has lower throughput. So x11perf -shmget500 suffers
(by about 30%) but real world applications improve by about 2x.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 3783933..46e383d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4028,7 +4028,7 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
if (__kgem_bo_is_busy(&sna->kgem, priv->gpu_bo))
return false;
- dst = __kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ dst = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
if (dst == NULL)
return false;
@@ -4048,7 +4048,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
box->x2 - box->x1, box->y2 - box->y1);
box++;
} while (--n);
- __kgem_bo_unmap__cpu(&sna->kgem, priv->gpu_bo, dst);
if (!DAMAGE_IS_ALL(priv->gpu_damage)) {
if (replaces) {
@@ -14345,6 +14344,62 @@ sna_get_image_blt(DrawablePtr drawable,
return ok;
}
+static bool
+sna_get_image_tiled(DrawablePtr drawable,
+ RegionPtr region,
+ char *dst,
+ unsigned flags)
+{
+ PixmapPtr pixmap = get_drawable_pixmap(drawable);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ struct sna *sna = to_sna_from_pixmap(pixmap);
+ char *src;
+
+ if (!sna->kgem.memcpy_from_tiled_x)
+ return false;
+
+ if (flags & MOVE_INPLACE_HINT)
+ return false;
+
+ if (priv == NULL || priv->gpu_bo == NULL)
+ return false;
+
+ if (priv->gpu_bo->tiling != I915_TILING_X)
+ return false;
+
+ if (priv->gpu_bo->scanout)
+ return false;
+
+ if (!sna->kgem.has_llc && priv->gpu_bo->domain != DOMAIN_CPU)
+ return false;
+
+ if (priv->gpu_damage == NULL ||
+ !(DAMAGE_IS_ALL(priv->gpu_damage) ||
+ sna_damage_contains_box__no_reduce(priv->gpu_damage,
+ ®ion->extents)))
+ return false;
+
+ src = kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ if (src == NULL)
+ return false;
+
+ DBG(("%s: download through a tiled CPU map\n", __FUNCTION__));
+
+ kgem_bo_sync__cpu_full(&sna->kgem, priv->gpu_bo, FORCE_FULL_SYNC);
+
+ memcpy_from_tiled_x(&sna->kgem, src, dst,
+ pixmap->drawable.bitsPerPixel,
+ priv->gpu_bo->pitch,
+ PixmapBytePad(region->extents.x2 - region->extents.x1,
+ drawable->depth),
+ region->extents.x1, region->extents.y1,
+ 0, 0,
+ region->extents.x2 - region->extents.x1,
+ region->extents.y2 - region->extents.y1);
+
+ return true;
+}
+
static void
sna_get_image(DrawablePtr drawable,
int x, int y, int w, int h,
@@ -14379,6 +14434,9 @@ sna_get_image(DrawablePtr drawable,
if (can_blt && sna_get_image_blt(drawable, ®ion, dst, flags))
return;
+ if (can_blt && sna_get_image_tiled(drawable, ®ion, dst, flags))
+ return;
+
if (!sna_drawable_move_region_to_cpu(drawable, ®ion, flags))
return;
commit 6493c8c65f93ad2554c2512a07ba640e966fd026
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 27 10:45:13 2013 +0100
sna: Implement memcpy_from_tiled functions (for X-tiling only atm)
To provide symmetry with the ability to write into an X-tiled mapping of
a bo, we add the memcpy_from_tiled to be able to read back from the same
bo.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/blt.c b/src/sna/blt.c
index b27c683..4a33093 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -277,6 +277,70 @@ memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
}
}
+static fast_memcpy void
+memcpy_from_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = src_stride / tile_width;
+ const unsigned swizzle_pixels = tile_width / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t sy = y + src_y;
+ const uint32_t tile_row =
+ (sy / tile_height * stride_tiles * tile_size +
+ (sy & (tile_height-1)) * tile_width);
+ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+ uint32_t sx = src_x, offset;
+
+ x = width * cpp;
+ if (sx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ memcpy(dst_row, (const char *)src + offset, length * cpp);
+
+ dst_row += length * cpp;
+ x -= length * cpp;
+ sx += length;
+ }
+ while (x >= 512) {
+ assert((sx & tile_mask) == 0);
+ offset = tile_row + (sx >> tile_pixels) * tile_size;
+
+ memcpy(dst_row, (const char *)src + offset, 512);
+
+ dst_row += 512;
+ x -= 512;
+ sx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ memcpy(dst_row, (const char *)src + offset, x);
+ }
+ }
+}
+
fast_memcpy static void
memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
@@ -347,6 +411,75 @@ memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
}
fast_memcpy static void
+memcpy_from_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = src_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t sy = y + src_y;
+ const uint32_t tile_row =
+ (sy / tile_height * stride_tiles * tile_size +
+ (sy & (tile_height-1)) * tile_width);
+ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+ uint32_t sx = src_x, offset;
+
+ x = width * cpp;
+ if (sx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
+
+ memcpy(dst_row, (const char *)src + offset, length * cpp);
+
+ dst_row += length * cpp;
+ x -= length * cpp;
+ sx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
+
+ memcpy(dst_row, (const char *)src + offset, 64);
+
+ dst_row += 64;
+ x -= 64;
+ sx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
+ memcpy(dst_row, (const char *)src + offset, x);
+ }
+ }
+}
+
+fast_memcpy static void
memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
@@ -416,6 +549,75 @@ memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
}
fast_memcpy static void
+memcpy_from_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = src_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t sy = y + src_y;
+ const uint32_t tile_row =
+ (sy / tile_height * stride_tiles * tile_size +
+ (sy & (tile_height-1)) * tile_width);
+ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+ uint32_t sx = src_x, offset;
+
+ x = width * cpp;
+ if (sx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+
+ memcpy(dst_row, (const char *)src + offset, length * cpp);
+
+ dst_row += length * cpp;
+ x -= length * cpp;
+ sx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+
+ memcpy(dst_row, (const char *)src + offset, 64);
+
+ dst_row += 64;
+ x -= 64;
+ sx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+ memcpy(dst_row, (const char *)src + offset, x);
+ }
+ }
+}
+
+fast_memcpy static void
memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
int32_t src_stride, int32_t dst_stride,
int16_t src_x, int16_t src_y,
@@ -483,7 +685,75 @@ memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
}
}
-void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling)
+fast_memcpy static void
+memcpy_from_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = src_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ dst = (uint8_t *)dst + dst_y * dst_stride + dst_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t sy = y + src_y;
+ const uint32_t tile_row =
+ (sy / tile_height * stride_tiles * tile_size +
+ (sy & (tile_height-1)) * tile_width);
+ uint8_t *dst_row = (uint8_t *)dst + dst_stride * y;
+ uint32_t sx = src_x, offset;
+
+ x = width * cpp;
+ if (sx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(sx + 1, swizzle_pixels);
+ const uint32_t length = min(src_x + width, swizzle_bound_pixels) - sx;
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+ memcpy(dst_row, (const char *)src + offset, length * cpp);
+
+ dst_row += length * cpp;
+ x -= length * cpp;
+ sx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+
+ memcpy(dst_row, (const char *)src + offset, 64);
+
+ dst_row += 64;
+ x -= 64;
+ sx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (sx >> tile_pixels) * tile_size +
+ (sx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+ memcpy(dst_row, (const char *)src + offset, x);
+ }
+ }
+}
+
+void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling)
{
switch (swizzling) {
default:
@@ -492,18 +762,22 @@ void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling)
case I915_BIT_6_SWIZZLE_NONE:
DBG(("%s: no swizzling\n", __FUNCTION__));
kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0;
+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_0;
break;
case I915_BIT_6_SWIZZLE_9:
DBG(("%s: 6^9 swizzling\n", __FUNCTION__));
kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9;
+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9;
break;
case I915_BIT_6_SWIZZLE_9_10:
DBG(("%s: 6^9^10 swizzling\n", __FUNCTION__));
kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10;
+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_10;
break;
case I915_BIT_6_SWIZZLE_9_11:
DBG(("%s: 6^9^11 swizzling\n", __FUNCTION__));
kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11;
+ kgem->memcpy_from_tiled_x = memcpy_from_tiled_x__swizzle_9_11;
break;
}
}
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 55c4fe5..5b78c83 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -991,7 +991,7 @@ static void kgem_init_swizzling(struct kgem *kgem)
if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
goto out;
- choose_memcpy_to_tiled_x(kgem, tiling.swizzle_mode);
+ choose_memcpy_tiled_x(kgem, tiling.swizzle_mode);
out:
gem_close(kgem->fd, tiling.handle);
}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 91a38f7..d1a391a 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -201,6 +201,11 @@ struct kgem {
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
+ void (*memcpy_from_tiled_x)(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height);
uint16_t reloc__self[256];
uint32_t batch[64*1024-8] page_aligned;
@@ -713,6 +718,21 @@ memcpy_to_tiled_x(struct kgem *kgem,
width, height);
}
-void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling);
+static inline void
+memcpy_from_tiled_x(struct kgem *kgem,
+ const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ return kgem->memcpy_from_tiled_x(src, dst, bpp,
+ src_stride, dst_stride,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height);
+}
+
+void choose_memcpy_tiled_x(struct kgem *kgem, int swizzling);
#endif /* KGEM_H */
More information about the xorg-commit
mailing list