[Mesa-dev] [PATCH] i965: Fix ETC2/EAC GetCompressed* functions on Gen7 GPUs
Kenneth Graunke
kenneth at whitecape.org
Fri May 25 18:53:58 UTC 2018
On Friday, February 23, 2018 7:10:55 AM PDT Eleni Maria Stea wrote:
> Gen 7 GPUs store the compressed EAC/ETC2 images in other non-compressed
> formats that can render. When GetCompressed* functions are called, the
> pixels are returned in the non-compressed format that is used for the
> rendering.
>
> With this patch we store both the compressed and non-compressed versions
> of the image, so that both rendering commands and GetCompressed*
> commands work.
>
> Also, the assertions for GL_MAP_WRITE_BIT and GL_MAP_INVALIDATE_RANGE_BIT
> in intel_miptree_map_etc function have been removed because when the
> miptree is mapped for reading (for example from a GetCompress*
> function) the GL_MAP_WRITE_BIT won't be set (and shouldn't be set).
> ---
> src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 10 +-
> src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 14 +++
> src/mesa/drivers/dri/i965/intel_tex.c | 157 +++++++++++++++++++++-----
> src/mesa/drivers/dri/i965/intel_tex.h | 8 ++
> src/mesa/drivers/dri/i965/intel_tex_image.c | 93 ++++++++++++++-
> src/mesa/drivers/dri/i965/intel_tex_obj.h | 8 ++
> 6 files changed, 256 insertions(+), 34 deletions(-)
Hello,
I think this patch could probably be simplified a bit, with less
duplication of core Mesa stuff...some suggestions below...
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 22977d6659..c8c7c025b6 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -730,9 +730,10 @@ miptree_create(struct brw_context *brw,
> mesa_format etc_format = MESA_FORMAT_NONE;
> uint32_t alloc_flags = 0;
>
> - format = intel_lower_compressed_format(brw, format);
> -
> - etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
> + if (!(flags & MIPTREE_CREATE_ETC)) {
> + format = intel_lower_compressed_format(brw, format);
> + etc_format = (format != tex_format) ? tex_format : MESA_FORMAT_NONE;
> + }
>
> if (flags & MIPTREE_CREATE_BUSY)
> alloc_flags |= BO_ALLOC_BUSY;
> @@ -3314,9 +3315,6 @@ intel_miptree_map_etc(struct brw_context *brw,
> assert(mt->format == MESA_FORMAT_R8G8B8X8_UNORM);
> }
>
> - assert(map->mode & GL_MAP_WRITE_BIT);
> - assert(map->mode & GL_MAP_INVALIDATE_RANGE_BIT);
> -
> map->stride = _mesa_format_row_stride(mt->etc_format, map->w);
> map->buffer = malloc(_mesa_format_image_size(mt->etc_format,
> map->w, map->h, 1));
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> index 7fcf09f118..bf6195b97a 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
> @@ -379,6 +379,20 @@ enum intel_miptree_create_flags {
> * that the miptree will be created with mt->aux_usage == NONE.
> */
> MIPTREE_CREATE_NO_AUX = 1 << 2,
> +
> + /** Create a second miptree for the compressed pixels (Gen7 only)
> + *
> + * On Gen7, we need to store 2 miptrees for some compressed
> + * formats so we can handle rendering as well as getting the
> + * compressed image data. This flag indicates that the miptree
> + * is expected to hold compressed data for the latter case.
> + */
> + MIPTREE_CREATE_ETC = 1 << 3,
> +};
Create flags look fine.
> +
> +enum intel_miptree_upload_flags {
> + MIPTREE_UPLOAD_DEFAULT = 0,
> + MIPTREE_UPLOAD_ETC,
> };
Rather than creating an extra set of flags here, I would just extend the
GL_MAP_*_BIT flags that already get passed around as 'mode'. There's
some precedent for that with BRW_MAP_DIRECT_BIT in intel_mipmap_tree.h:
/**
* This bit extends the set of GL_MAP_*_BIT enums.
*
* When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a
* depthstencil-split-to-separate-stencil miptree, we'll normally make a
* temporary and recreate the kind of data requested by Mesa core, since we're
* satisfying some glGetTexImage() request or something.
*
* However, occasionally you want to actually map the miptree's current data
* without transcoding back. This flag to intel_miptree_map() gets you that.
*/
#define BRW_MAP_DIRECT_BIT 0x80000000
So, I'd just make a BRW_MAP_ETC_BIT 0x40000000, and use that instead.
The advantage is that you should be able to reuse existing functions
rather than creating new ones that take an extra 'flags' parameter.
>
> struct intel_mipmap_tree *intel_miptree_create(struct brw_context *brw,
> diff --git a/src/mesa/drivers/dri/i965/intel_tex.c b/src/mesa/drivers/dri/i965/intel_tex.c
> index 65a1cb37d4..56077a7676 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex.c
> @@ -66,6 +66,8 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
> struct intel_texture_image *intel_image = intel_texture_image(image);
> struct gl_texture_object *texobj = image->TexObject;
> struct intel_texture_object *intel_texobj = intel_texture_object(texobj);
> + struct gen_device_info *devinfo = &brw->screen->devinfo;
> + mesa_format fmt = image->TexFormat;
>
> assert(image->Border == 0);
>
> @@ -92,9 +94,10 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
> __func__, texobj, image->Level,
> image->Width, image->Height, image->Depth, intel_texobj->mt);
> } else {
> - intel_image->mt = intel_miptree_create_for_teximage(brw, intel_texobj,
> - intel_image,
> - MIPTREE_CREATE_DEFAULT);
> + intel_image->mt =
> + intel_miptree_create_for_teximage(brw, intel_texobj,
> + intel_image,
> + MIPTREE_CREATE_DEFAULT);
> if (!intel_image->mt)
> return false;
>
> @@ -110,6 +113,33 @@ intel_alloc_texture_image_buffer(struct gl_context *ctx,
> image->Width, image->Height, image->Depth, intel_image->mt);
> }
>
> + if (devinfo->gen == 7 && _mesa_is_format_etc2(fmt)) {
> + if (intel_texobj->cmt && intel_miptree_match_image(intel_texobj->cmt,
> + image)) {
> + intel_miptree_reference(&intel_image->cmt, intel_texobj->cmt);
> + DBG("%s: alloc obj %p level %d %dx%dx%d using object's miptree %p\n",
> + __func__, texobj, image->Level,
> + image->Width, image->Height, image->Depth, intel_texobj->cmt);
> + } else {
> + intel_image->cmt =
> + intel_miptree_create_for_teximage(brw, intel_texobj,
> + intel_image,
> + MIPTREE_CREATE_ETC);
> + if (!intel_image->cmt)
> + return false;
> +
> + /* Even if the object currently has a mipmap tree associated
> + * with it, this one is a more likely candidate to represent the
> + * whole object since our level didn't fit what was there
> + * before, and any lower levels would fit into our miptree.
> + */
> + intel_miptree_reference(&intel_texobj->cmt, intel_image->cmt);
> +
> + DBG("%s: alloc obj %p level %d %dx%dx%d using new miptree %p\n",
> + __func__, texobj, image->Level,
> + image->Width, image->Height, image->Depth, intel_image->cmt);
> + }
> + }
> intel_texobj->needs_validate = true;
>
> return true;
> @@ -136,6 +166,10 @@ intel_alloc_texture_storage(struct gl_context *ctx,
> int face;
> int level;
>
> + mesa_format fmt = first_image->TexFormat;
> + struct gen_device_info *devinfo = &brw->screen->devinfo;
> + bool is_etc_gen7 = (devinfo->gen == 7) && _mesa_is_format_etc2(fmt);
I'd prefer:
bool is_fake_etc = devinfo->gen < 8 && _mesa_is_format_etc2(fmt);
Also, please put devinfo near the top of the function, so it's available
in future patches that may want to add more gen checks.
> +
> /* If the object's current miptree doesn't match what we need, make a new
> * one.
> */
> @@ -157,6 +191,22 @@ intel_alloc_texture_storage(struct gl_context *ctx,
> }
> }
>
> + if (is_etc_gen7) {
> + if (!intel_texobj->cmt ||
> + !intel_miptree_match_image(intel_texobj->cmt, first_image) ||
> + intel_texobj->cmt->last_level != levels - 1) {
> + intel_miptree_release(&intel_texobj->cmt);
> +
> + intel_get_image_dims(first_image, &width, &height, &depth);
> + intel_texobj->cmt = intel_miptree_create(brw, texobj->Target,
> + first_image->TexFormat,
> + 0, levels - 1,
> + width, height, depth,
> + MAX2(num_samples, 1),
> + MIPTREE_CREATE_ETC);
> + }
> + }
> +
> for (face = 0; face < numFaces; face++) {
> for (level = 0; level < levels; level++) {
> struct gl_texture_image *image = texobj->Image[face][level];
> @@ -169,6 +219,8 @@ intel_alloc_texture_storage(struct gl_context *ctx,
> return false;
>
> intel_miptree_reference(&intel_image->mt, intel_texobj->mt);
> + if (is_etc_gen7)
> + intel_miptree_reference(&intel_image->cmt, intel_texobj->cmt);
> }
> }
>
> @@ -191,29 +243,28 @@ intel_free_texture_image_buffer(struct gl_context * ctx,
> DBG("%s\n", __func__);
>
> intel_miptree_release(&intelImage->mt);
> + intel_miptree_release(&intelImage->cmt);
>
> _swrast_free_texture_image_buffer(ctx, texImage);
> }
>
> -/**
> - * Map texture memory/buffer into user space.
> - * Note: the region of interest parameters are ignored here.
> - * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
> - * \param mapOut returns start of mapping of region of interest
> - * \param rowStrideOut returns row stride in bytes
> - */
> -static void
> -intel_map_texture_image(struct gl_context *ctx,
> - struct gl_texture_image *tex_image,
> - GLuint slice,
> - GLuint x, GLuint y, GLuint w, GLuint h,
> - GLbitfield mode,
> - GLubyte **map,
> - GLint *out_stride)
With my suggestion, you won't need to split this function - you can just
do intel_map_texture_image with mode = GL_MAP_READ_BIT | BRW_MAP_ETC_BIT
and do the special CMT case if BRW_MAP_ETC_BIT is set. Much smaller
change here.
> +void
> +intel_map_texture_image_for_upload(struct gl_context *ctx,
> + struct gl_texture_image *tex_image,
> + GLuint slice,
> + GLuint x, GLuint y, GLuint w, GLuint h,
> + GLbitfield mode,
> + GLubyte **map,
> + GLint *out_stride,
> + enum intel_miptree_upload_flags flags)
> {
> struct brw_context *brw = brw_context(ctx);
> + struct gen_device_info *devinfo = &brw->screen->devinfo;
> struct intel_texture_image *intel_image = intel_texture_image(tex_image);
> struct intel_mipmap_tree *mt = intel_image->mt;
> + struct intel_mipmap_tree *cmt = intel_image->cmt;
> + mesa_format fmt = tex_image->TexFormat;
> +
> ptrdiff_t stride;
>
> /* Our texture data is always stored in a miptree. */
> @@ -229,13 +280,56 @@ intel_map_texture_image(struct gl_context *ctx,
> if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
> slice = tex_image->Face;
>
> - intel_miptree_map(brw, mt,
> - tex_image->Level + tex_image->TexObject->MinLevel,
> - slice + tex_image->TexObject->MinLayer,
> - x, y, w, h, mode,
> - (void **)map, &stride);
> + if (flags == MIPTREE_UPLOAD_DEFAULT) {
> + intel_miptree_map(brw, mt,
> + tex_image->Level + tex_image->TexObject->MinLevel,
> + slice + tex_image->TexObject->MinLayer,
> + x, y, w, h, mode,
> + (void **)map, &stride);
> + *out_stride = stride;
> + }
>
> - *out_stride = stride;
> + if (devinfo->gen == 7) {
> + /* In gen 7 we map the compressed miptree (cmt) in these 2 cases:
> + * 1- when we map the image for reading (glGetCompressed* function)
> + * 2- when we create the compressed miptree (the flag must be
> + * MIPTREE_UPLOAD_ETC).
> + */
> + bool is_etc_upload = (flags == MIPTREE_UPLOAD_ETC);
> + bool is_etc_get = !(mode & GL_MAP_WRITE_BIT) &&
> + _mesa_is_format_etc2(fmt);
> +
> + if (is_etc_upload || is_etc_get) {
> + assert(cmt);
> + intel_miptree_map(brw, cmt,
> + tex_image->Level + tex_image->TexObject->MinLevel,
> + slice + tex_image->TexObject->MinLayer,
> + x, y, w, h, mode,
> + (void **)map, &stride);
> + *out_stride = stride;
> + }
> + }
> +}
> +
> +/**
> + * Map texture memory/buffer into user space.
> + * Note: the region of interest parameters are ignored here.
> + * \param mode bitmask of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT
> + * \param mapOut returns start of mapping of region of interest
> + * \param rowStrideOut returns row stride in bytes
> + */
> +static void
> +intel_map_texture_image(struct gl_context *ctx,
> + struct gl_texture_image *tex_image,
> + GLuint slice,
> + GLuint x, GLuint y, GLuint w, GLuint h,
> + GLbitfield mode,
> + GLubyte **map,
> + GLint *out_stride)
> +{
> + intel_map_texture_image_for_upload(ctx, tex_image, slice, x, y, w, h,
> + mode, map, out_stride,
> + MIPTREE_UPLOAD_DEFAULT);
> }
>
> static void
> @@ -245,13 +339,22 @@ intel_unmap_texture_image(struct gl_context *ctx,
> struct brw_context *brw = brw_context(ctx);
> struct intel_texture_image *intel_image = intel_texture_image(tex_image);
> struct intel_mipmap_tree *mt = intel_image->mt;
> + struct intel_mipmap_tree *cmt = intel_image->cmt;
>
> if (tex_image->TexObject->Target == GL_TEXTURE_CUBE_MAP)
> slice = tex_image->Face;
>
> - intel_miptree_unmap(brw, mt,
> - tex_image->Level + tex_image->TexObject->MinLevel,
> - slice + tex_image->TexObject->MinLayer);
> + if (cmt) {
> + intel_miptree_unmap(brw, cmt,
> + tex_image->Level + tex_image->TexObject->MinLevel,
> + slice + tex_image->TexObject->MinLayer);
> + }
> +
> + if (mt) {
> + intel_miptree_unmap(brw, mt,
> + tex_image->Level + tex_image->TexObject->MinLevel,
> + slice + tex_image->TexObject->MinLayer);
> + }
> }
>
> static GLboolean
> diff --git a/src/mesa/drivers/dri/i965/intel_tex.h b/src/mesa/drivers/dri/i965/intel_tex.h
> index 9fb1b3ffbc..42cd0de2de 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex.h
> +++ b/src/mesa/drivers/dri/i965/intel_tex.h
> @@ -53,4 +53,12 @@ intel_miptree_create_for_teximage(struct brw_context *brw,
> void intel_finalize_mipmap_tree(struct brw_context *brw,
> struct gl_texture_object *tex_obj);
>
> +void intel_map_texture_image_for_upload(struct gl_context *ctx,
> + struct gl_texture_image *tex_image,
> + GLuint slice,
> + GLuint x, GLuint y, GLuint w, GLuint h,
> + GLbitfield mode,
> + GLubyte **map,
> + GLint *out_stride,
> + enum intel_miptree_upload_flags flags);
And this would be dropped.
> #endif
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c
> index 90b6519625..8e7d63a2e0 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_image.c
> +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
> @@ -849,6 +849,96 @@ flush_astc_denorms(struct gl_context *ctx, GLuint dims,
> }
> }
>
> +static void intel_upload_compressed_texsubimage(struct gl_context *ctx,
> + GLuint dims, struct compressed_pixelstore *store,
> + struct gl_texture_image *intelImage,
> + GLint xoffset, GLint yoffset, GLint zoffset,
> + GLsizei width, GLsizei height, enum intel_miptree_upload_flags flags,
> + const GLvoid *data)
> +{
> + if (!data)
> + return;
> +
> + const GLubyte *src = (const GLubyte *) data + store->SkipBytes;
> +
> + GLint i, slice, dstRowStride;
> + GLubyte *dstMap;
> +
> + for (slice = 0; slice < store->CopySlices; slice++) {
> + if (dstMap) {
> + intel_map_texture_image_for_upload(ctx, intelImage, slice + zoffset,
> + xoffset, yoffset, width, height,
> + GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT,
> + &dstMap, &dstRowStride, flags);
> +
> + if (dstRowStride == store->TotalBytesPerRow &&
> + dstRowStride == store->CopyBytesPerRow) {
> + memcpy(dstMap, src, store->CopyBytesPerRow *
> + store->CopyRowsPerSlice);
> + src += store->CopyBytesPerRow * store->CopyRowsPerSlice;
> + }
> + else {
> + for (i = 0; i < store->CopyRowsPerSlice; i++) {
> + memcpy(dstMap, src, store->CopyBytesPerRow);
> + dstMap += dstRowStride;
> + src += store->TotalBytesPerRow;
> + }
> + }
> +
> + ctx->Driver.UnmapTextureImage(ctx, intelImage, slice + zoffset);
> +
> + src += store->TotalBytesPerRow * (store->TotalRowsPerSlice -
> + store->CopyRowsPerSlice);
> + }
> + else {
> + _mesa_error(ctx, GL_OUT_OF_MEMORY, "glCompressedTexSubImage%uD",
> + dims);
> + }
> + }
> +}
This is a ton of copy and paste from _mesa_store_compressed_texsubimage.
With my mode vs. flags suggestion, we can do ctx->Driver.MapTextureImage
instead of intel_map_texture_image_for_upload, making this identical...
So, I would edit texstore.c to pull the core Mesa code into a helper
function. Replace GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT with
a parameter...
> +
> +static void
> +intel_store_compressed_texsubimage(struct gl_context *ctx, GLuint dims,
> + struct gl_texture_image *intelImage,
> + GLint xoffset, GLint yoffset, GLint zoffset,
> + GLsizei width, GLsizei height,
> + GLsizei depth, GLenum format,
> + GLsizei imageSize, const GLvoid *data)
> +{
> + struct compressed_pixelstore store;
> + struct brw_context *brw = (struct brw_context*) ctx;
> + const struct gen_device_info *devinfo = &brw->screen->devinfo;
> +
> + if (dims == 1) {
> + _mesa_problem(ctx, "Unexpected 1D compressed texsubimage call");
> + return;
> + }
> +
> + _mesa_compute_compressed_pixelstore(dims, intelImage->TexFormat,
> + width, height, depth,
> + &ctx->Unpack, &store);
> +
> + /* get pointer to src pixels (may be in a pbo which we'll map here) */
> + data = _mesa_validate_pbo_compressed_teximage(ctx, dims, imageSize, data,
> + &ctx->Unpack,
> + "glCompressedTexSubImage");
> + if (!data)
> + return;
This part is fine, it's not much duplication, and lets you only compute
the pixelstore/unmap the PBO once for both uploads. So here, we'd just
change...
> +
> + intel_upload_compressed_texsubimage(ctx, dims, &store, intelImage,
> + xoffset, yoffset, zoffset,
> + width, height,
> + MIPTREE_UPLOAD_DEFAULT, data);
> +
> + if (devinfo->gen == 7 && _mesa_is_format_etc2(intelImage->TexFormat)) {
> + intel_upload_compressed_texsubimage(ctx, dims, &store, intelImage,
> + xoffset, yoffset, zoffset,
> + width, height,
> + MIPTREE_UPLOAD_ETC, data);
> + }
..these to call the new texstore.c helper with BRW_MAP_ETC_BIT as an
extra MapTextureImage mode bit.
Other than that, it looks good to me. Thanks for doing this, and so
sorry it took me three months to get around to reviewing it!
> +
> + _mesa_unmap_teximage_pbo(ctx, &ctx->Unpack);
> +}
>
> static void
> intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
> @@ -859,7 +949,7 @@ intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
> GLsizei imageSize, const GLvoid *data)
> {
> /* Upload the compressed data blocks */
> - _mesa_store_compressed_texsubimage(ctx, dims, texImage,
> + intel_store_compressed_texsubimage(ctx, dims, texImage,
> xoffset, yoffset, zoffset,
> width, height, depth,
> format, imageSize, data);
> @@ -869,6 +959,7 @@ intelCompressedTexSubImage(struct gl_context *ctx, GLuint dims,
> texImage->TexFormat);
> bool is_linear_astc = _mesa_is_astc_format(gl_format) &&
> !_mesa_is_srgb_format(gl_format);
> +
> struct brw_context *brw = (struct brw_context*) ctx;
> const struct gen_device_info *devinfo = &brw->screen->devinfo;
> if (devinfo->gen == 9 && is_linear_astc)
> diff --git a/src/mesa/drivers/dri/i965/intel_tex_obj.h b/src/mesa/drivers/dri/i965/intel_tex_obj.h
> index 27c18b7c3c..0b3d2fc08a 100644
> --- a/src/mesa/drivers/dri/i965/intel_tex_obj.h
> +++ b/src/mesa/drivers/dri/i965/intel_tex_obj.h
> @@ -50,6 +50,11 @@ struct intel_texture_object
> */
> struct intel_mipmap_tree *mt;
>
> + /* This miptree is used to store the compressed ETC2/EAC pixels
> + * on Gen 7 GPUs for GetCompressed* functions to work.
> + */
> + struct intel_mipmap_tree *cmt;
> +
> /**
> * Set when mipmap trees in the texture images of this texture object
> * might not all be the mipmap tree above.
> @@ -79,6 +84,9 @@ struct intel_texture_image
> * Else there is no image data.
> */
> struct intel_mipmap_tree *mt;
> +
> + /* Stores the ETC2 formatted image on Gen7 GPUs */
> + struct intel_mipmap_tree *cmt;
> };
>
> static inline struct intel_texture_object *
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 833 bytes
Desc: This is a digitally signed message part.
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180525/29e5937f/attachment-0001.sig>
More information about the mesa-dev
mailing list