[Mesa-dev] [PATCH 06/10] radeonsi: implement transfer_map with user_stride

Wed Apr 25 21:16:27 UTC 2018

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

The stride ends up being aligned by AddrLib in ways that are
inconvenient to express clearly, but basically, a stride that
is aligned to both 64 pixels and 256 bytes will go through
unchanged in practice.
---
 src/gallium/drivers/radeonsi/si_texture.c | 35 +++++++++++++++++++----
 1 file changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 43f1560ec3e..368fb034977 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -170,43 +170,54 @@ static void si_copy_from_staging_texture(struct pipe_context *ctx, struct r600_t
 					   transfer->box.x, transfer->box.y, transfer->box.z,
 					   src, 0, &sbox);
 		return;
 	}
 
 	sctx->dma_copy(ctx, dst, transfer->level,
 		       transfer->box.x, transfer->box.y, transfer->box.z,
 		       src, 0, &sbox);
 }
 
+static unsigned si_texture_get_stride(struct si_screen *sscreen,
+				      struct r600_texture *rtex,
+				      unsigned level)
+{
+	if (sscreen->info.chip_class >= GFX9) {
+		return rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
+	} else {
+		return rtex->surface.u.legacy.level[level].nblk_x *
+		       rtex->surface.bpe;
+	}
+}
+
 static unsigned si_texture_get_offset(struct si_screen *sscreen,
 				      struct r600_texture *rtex, unsigned level,
 				      const struct pipe_box *box,
 				      unsigned *stride,
 				      unsigned *layer_stride)
 {
+	*stride = si_texture_get_stride(sscreen, rtex, level);
+
 	if (sscreen->info.chip_class >= GFX9) {
-		*stride = rtex->surface.u.gfx9.surf_pitch * rtex->surface.bpe;
 		*layer_stride = rtex->surface.u.gfx9.surf_slice_size;
 
 		if (!box)
 			return 0;
 
 		/* Each texture is an array of slices. Each slice is an array
 		 * of mipmap levels. */
 		return box->z * rtex->surface.u.gfx9.surf_slice_size +
 		       rtex->surface.u.gfx9.offset[level] +
 		       (box->y / rtex->surface.blk_h *
 			rtex->surface.u.gfx9.surf_pitch +
 			box->x / rtex->surface.blk_w) * rtex->surface.bpe;
 	} else {
-		*stride = rtex->surface.u.legacy.level[level].nblk_x *
-			  rtex->surface.bpe;
 		assert((uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 <= UINT_MAX);
 		*layer_stride = (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4;
 
 		if (!box)
 			return rtex->surface.u.legacy.level[level].offset;
 
 		/* Each texture is an array of mipmap levels. Each level is
 		 * an array of slices. */
 		return rtex->surface.u.legacy.level[level].offset +
 		       box->z * (uint64_t)rtex->surface.u.legacy.level[level].slice_size_dw * 4 +
@@ -1686,21 +1697,23 @@ static void *si_texture_transfer_map(struct pipe_context *ctx,
 
 		/* Tiled textures need to be converted into a linear texture for CPU
 		 * access. The staging texture is always linear and is placed in GART.
 		 *
 		 * Reading from VRAM or GTT WC is slow, always use the staging
 		 * texture in this case.
 		 *
 		 * Use the staging texture for uploads if the underlying BO
 		 * is busy.
 		 */
-		if (!rtex->surface.is_linear)
+		if (!rtex->surface.is_linear ||
+		    (user_stride &&
+		     user_stride != si_texture_get_stride(sctx->screen, rtex, level)))
 			use_staging_texture = true;
 		else if (usage & PIPE_TRANSFER_READ)
 			use_staging_texture =
 				rtex->resource.domains & RADEON_DOMAIN_VRAM ||
 				rtex->resource.flags & RADEON_FLAG_GTT_WC;
 		/* Write & linear only: */
 		else if (si_rings_is_buffer_referenced(sctx, rtex->resource.buf,
 						       RADEON_USAGE_READWRITE) ||
 			 !sctx->ws->buffer_wait(rtex->resource.buf, 0,
 						RADEON_USAGE_READWRITE)) {
@@ -1778,23 +1791,33 @@ static void *si_texture_transfer_map(struct pipe_context *ctx,
 							 level, box,
 							 &trans->b.b.stride,
 							 &trans->b.b.layer_stride);
 		}
 
 		trans->staging = (struct r600_resource*)staging_depth;
 		buf = trans->staging;
 	} else if (use_staging_texture) {
 		struct pipe_resource resource;
 		struct r600_texture *staging;
+		struct pipe_box staging_box = *box;
+
+		if (user_stride) {
+			if (user_stride % rtex->surface.bpe != 0)
+				goto fail_trans;
+
+			staging_box.width = user_stride / rtex->surface.bpe;
+			assert(staging_box.width >= box->width);
+		}
+
+		si_init_temp_resource_from_box(&resource, texture, &staging_box, level,
+					       SI_RESOURCE_FLAG_TRANSFER);
 
-		si_init_temp_resource_from_box(&resource, texture, box, level,
-						 SI_RESOURCE_FLAG_TRANSFER);
 		resource.usage = (usage & PIPE_TRANSFER_READ) ?
 			PIPE_USAGE_STAGING : PIPE_USAGE_STREAM;
 
 		/* Create the temporary texture. */
 		staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource);
 		if (!staging) {
 			PRINT_ERR("failed to create temporary texture to hold untiled copy\n");
 			goto fail_trans;
 		}
 		trans->staging = &staging->resource;
-- 
2.17.0