[Mesa-dev] [PATCH v5] nv50/ir, nvc0: add debug options for shader replacement
Rhys Perry
pendingchaos02 at gmail.com
Wed May 30 14:37:15 UTC 2018
Changes in v5:
- Add a forgotten change to fix memory leaks of fname
Changes in v4:
- Move code to nv50_ir_dump.cpp
- Dump headers of nvc0 programs
- Use CRC-32 instead of a truncated SHA1
- Set prog->maxGPR to targ->getFileSize() - 1 and set prog->tlsSize
- Don't compile the program if a replacement is offered
This has the consequence that a program is not dumped when it's replaced
Changes in v3:
- Fixed messed up patch description and diff
- Use the checksum of the TGSI instead of the binary if possible
Changes in v2:
- move "#ifdef DEBUG" from above dumpProgram to above createDumpFilename
The NV50_PROG_DUMP environment variable specifies a (already created)
directory to dump shader binaries, headers and tgsi code. The
NV50_PROG_REPLACE environment variable specifies a (already created)
directory that is searched to find replacement binaries and headers. This
is all much like MESA_SHADER_DUMP_PATH and MESA_SHADER_READ_PATH expect
using CRC-32 checksums instead of program IDs and chip-specific binaries
instead of GLSL.
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
src/gallium/auxiliary/tgsi/tgsi_util.h | 1 +
src/gallium/drivers/nouveau/Makefile.sources | 2 +
src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 40 +++--
.../drivers/nouveau/codegen/nv50_ir_driver.h | 1 +
.../drivers/nouveau/codegen/nv50_ir_dump.cpp | 174 +++++++++++++++++++++
src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h | 70 +++++++++
src/gallium/drivers/nouveau/meson.build | 2 +
src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 138 ++++++++++------
8 files changed, 363 insertions(+), 65 deletions(-)
create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp
create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h
diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 686b90f467..81cf955d8f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -28,6 +28,7 @@
#ifndef TGSI_UTIL_H
#define TGSI_UTIL_H
+#include "pipe/p_compiler.h"
#include "pipe/p_shader_tokens.h"
#if defined __cplusplus
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 65f08c7d8d..e867221818 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -114,6 +114,8 @@ NV50_CODEGEN_SOURCES := \
codegen/nv50_ir_build_util.cpp \
codegen/nv50_ir_build_util.h \
codegen/nv50_ir_driver.h \
+ codegen/nv50_ir_dump.cpp \
+ codegen/nv50_ir_dump.h \
codegen/nv50_ir_emit_nv50.cpp \
codegen/nv50_ir_from_tgsi.cpp \
codegen/nv50_ir_graph.cpp \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c987da9908..b1782bb4f2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -23,6 +23,7 @@
#include "codegen/nv50_ir.h"
#include "codegen/nv50_ir_target.h"
#include "codegen/nv50_ir_driver.h"
+#include "codegen/nv50_ir_dump.h"
extern "C" {
#include "nouveau_debug.h"
@@ -1244,30 +1245,35 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
prog->print();
targ->parseDriverInfo(info);
- prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
- prog->convertToSSA();
+ if (!nv50_ir::replaceProgramCode(prog)) {
+ prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
- if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
- prog->print();
+ prog->convertToSSA();
- prog->optimizeSSA(info->optLevel);
- prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
+ if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+ prog->print();
- if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
- prog->print();
+ prog->optimizeSSA(info->optLevel);
+ prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
- if (!prog->registerAllocation()) {
- ret = -4;
- goto out;
- }
- prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
+ if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+ prog->print();
- prog->optimizePostRA(info->optLevel);
+ if (!prog->registerAllocation()) {
+ ret = -4;
+ goto out;
+ }
+ prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
- if (!prog->emitBinary(info)) {
- ret = -5;
- goto out;
+ prog->optimizePostRA(info->optLevel);
+
+ if (!prog->emitBinary(info)) {
+ ret = -5;
+ goto out;
+ }
+
+ nv50_ir::dumpProgramCodeAndIR(prog);
}
out:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 3d0782f86b..9c23c74628 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -93,6 +93,7 @@ struct nv50_ir_prog_info
uint32_t codeSize;
uint32_t instructions;
uint8_t sourceRep; /* PIPE_SHADER_IR_* */
+ uint32_t sourceHash; /* CRC-32 */
const void *source;
void *relocData;
void *fixupData;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp
new file mode 100644
index 0000000000..fc2551a2ee
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp
@@ -0,0 +1,174 @@
+#include "nv50_ir_dump.h"
+
+#include "codegen/nv50_ir_target.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/crc32.h"
+
+#ifdef DEBUG
+static char *
+createDumpFilename(const char *dir, const nv50_ir_prog_info *info, const char *ext)
+{
+ char* fname = (char*)MALLOC(strlen(dir) + 13 + strlen(ext));
+ if (dir[0] && dir[strlen(dir) - 1] == '/')
+ sprintf(fname, "%s%.8x", dir, info->bin.sourceHash);
+ else
+ sprintf(fname, "%s/%.8x", dir, info->bin.sourceHash);
+
+ switch (info->type) {
+ case PIPE_SHADER_VERTEX:
+ strcat(fname, ".vs");
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ strcat(fname, ".tcs");
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ strcat(fname, ".tes");
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ strcat(fname, ".gs");
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ strcat(fname, ".fs");
+ break;
+ case PIPE_SHADER_COMPUTE:
+ strcat(fname, ".cs");
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ strcat(fname, ext);
+
+ return fname;
+}
+
+extern "C" {
+
+void
+nv50_ir_create_source_hash(nv50_ir_prog_info *info)
+{
+ switch (info->bin.sourceRep) {
+ case PIPE_SHADER_IR_TGSI: {
+ const tgsi_header* header = (const tgsi_header*)info->bin.source;
+ unsigned size = (header->HeaderSize + header->BodySize) * sizeof(tgsi_token);
+ info->bin.sourceHash = util_hash_crc32(info->bin.source, size);
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+}
+
+FILE *
+nv50_ir_begin_dump(const nv50_ir_prog_info *info, const char *what,
+ const char *ext, bool binary)
+{
+ const char *dump_dir = debug_get_option("NV50_PROG_DUMP", NULL);
+ if (!dump_dir)
+ return NULL;
+
+ char* fname = createDumpFilename(dump_dir, info, ext);
+
+ FILE *fp = fopen(fname, binary ? "wb" : "w");
+ if (!fp) {
+ FREE(fname);
+ INFO("Failed to dump %s of a program to %s\n", what, fname);
+ return NULL;
+ }
+
+ INFO("Dumping %s of a program to %s\n", what, fname);
+
+ FREE(fname);
+
+ return fp;
+}
+
+bool
+nv50_ir_get_replacement(const nv50_ir_prog_info *info, const char *what,
+ const char *ext, size_t *size, void **data)
+{
+ const char *replace_dir = debug_get_option("NV50_PROG_REPLACE", NULL);
+ if (!replace_dir)
+ return false;
+
+ char* fname = createDumpFilename(replace_dir, info, ext);
+
+ FILE *fp = fopen(fname, "rb");
+ if (!fp) {
+ FREE(fname);
+ return false;
+ }
+
+ *size = 0;
+ *data = MALLOC(65536);
+
+ size_t bufSize = 65536;
+ size_t read = 0;
+ while ((read = fread(*data, 1, bufSize - *size, fp))) {
+ *size += read;
+ if (*size == bufSize) {
+ *data = REALLOC(*data, bufSize, bufSize * 2);
+ bufSize *= 2;
+ }
+ }
+
+ INFO("Replacing code of a program with that from %s\n", fname);
+
+ FREE(fname);
+
+ return true;
+}
+
+}
+
+namespace nv50_ir {
+
+void
+dumpProgramCodeAndIR(const nv50_ir::Program *prog)
+{
+ FILE *fp = nv50_ir_begin_dump(prog->driver, "code", ".bin", true);
+ if (fp) {
+ fwrite(prog->code, prog->binSize, 1, fp);
+ fclose(fp);
+ }
+
+ switch (prog->driver->bin.sourceRep) {
+ case PIPE_SHADER_IR_TGSI: {
+ const tgsi_token *tokens = (const tgsi_token *)prog->driver->bin.source;
+ fp = nv50_ir_begin_dump(prog->driver, "tgsi", ".tgsi.txt", false);
+ if (fp) {
+ tgsi_dump_to_file(tokens, 0, fp);
+ fclose(fp);
+ }
+ break;
+ }
+ default:
+ assert(0);
+ break;
+ }
+}
+
+bool
+replaceProgramCode(nv50_ir::Program *prog)
+{
+ const nv50_ir::Target* targ = prog->getTarget();
+
+ size_t size;
+ void *data;
+ if (!nv50_ir_get_replacement(prog->driver, "code", ".bin", &size, &data))
+ return false;
+
+ FREE(prog->code);
+ prog->code = (uint32_t*)data;
+ prog->binSize = size;
+ prog->maxGPR = targ->getFileSize(nv50_ir::FILE_GPR) - 1;
+ prog->tlsSize = targ->getFileSize(nv50_ir::FILE_MEMORY_LOCAL);
+
+ return true;
+}
+
+} // namespace nv50_ir
+
+#endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h
new file mode 100644
index 0000000000..80f4f7e7d5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h
@@ -0,0 +1,70 @@
+#ifndef __NV50_IR_DUMP__
+#define __NV50_IR_DUMP__
+
+#include <stdio.h>
+#include "util/macros.h" /* For ALWAYS_INLINE */
+#include "nv50_ir_driver.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef DEBUG
+void
+nv50_ir_create_source_hash(struct nv50_ir_prog_info *info);
+
+FILE *
+nv50_ir_begin_dump(const struct nv50_ir_prog_info *info, const char *what,
+ const char *ext, bool binary);
+
+bool
+nv50_ir_get_replacement(const struct nv50_ir_prog_info *info, const char *what,
+ const char *ext, size_t *size, void **data);
+#else
+ALWAYS_INLINE void
+nv50_ir_create_source_hash(struct nv50_ir_prog_info *info) {
+ info->bin.sourceHash = 0;
+}
+
+ALWAYS_INLINE FILE *
+nv50_ir_begin_dump(const struct nv50_ir_prog_info *info, const char *what,
+ const char *ext, bool binary)
+{
+ return NULL;
+}
+
+ALWAYS_INLINE bool
+nv50_ir_get_replacement(const struct nv50_ir_prog_info *info, const char *what,
+ const char *ext, size_t *size, void **data)
+{
+ return false;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+#include "nv50_ir.h"
+
+namespace nv50_ir {
+
+#ifdef DEBUG
+void
+dumpProgramCodeAndIR(const Program *prog);
+
+bool
+replaceProgramCode(Program *prog);
+#else
+ALWAYS_INLINE void
+dumpProgramCodeAndIR(Program *prog) {}
+
+ALWAYS_INLINE bool
+replaceProgramCode(Program *prog) {return false;}
+#endif
+
+} // namespace nv50_ir
+#endif
+
+#endif
diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build
index 242ee0e000..f7774326f2 100644
--- a/src/gallium/drivers/nouveau/meson.build
+++ b/src/gallium/drivers/nouveau/meson.build
@@ -128,6 +128,8 @@ files_libnouveau = files(
'codegen/nv50_ir_build_util.cpp',
'codegen/nv50_ir_build_util.h',
'codegen/nv50_ir_driver.h',
+ 'codegen/nv50_ir_dump.cpp',
+ 'codegen/nv50_ir_dump.h',
'codegen/nv50_ir_emit_nv50.cpp',
'codegen/nv50_ir_from_tgsi.cpp',
'codegen/nv50_ir_graph.cpp',
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb..06d989a3f8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -27,6 +27,7 @@
#include "nvc0/nvc0_context.h"
#include "codegen/nv50_ir_driver.h"
+#include "codegen/nv50_ir_dump.h"
#include "nvc0/nve4_compute.h"
/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
@@ -506,6 +507,64 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
return 0;
}
+static int
+nvc0_program_create_header(struct nvc0_program *prog,
+ struct nv50_ir_prog_info *info)
+{
+ int ret = 0;
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX:
+ ret = nvc0_vp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_TESS_CTRL:
+ ret = nvc0_tcp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_TESS_EVAL:
+ ret = nvc0_tep_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_GEOMETRY:
+ ret = nvc0_gp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ ret = nvc0_fp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_COMPUTE:
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
+ break;
+ default:
+ ret = -1;
+ NOUVEAU_ERR("unknown program type: %u\n", prog->type);
+ break;
+ }
+ if (ret)
+ return ret;
+
+ if (info->bin.tlsSpace) {
+ assert(info->bin.tlsSpace < (1 << 24));
+ prog->hdr[0] |= 1 << 26;
+ prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
+ prog->need_tls = true;
+ }
+ /* TODO: factor 2 only needed where joinat/precont is used,
+ * and we only have to count non-uniform branches
+ */
+ /*
+ if ((info->maxCFDepth * 2) > 16) {
+ prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
+ prog->need_tls = true;
+ }
+ */
+ if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 26;
+ if (info->io.globalAccess & 0x2)
+ prog->hdr[0] |= 1 << 16;
+ if (info->io.fp64)
+ prog->hdr[0] |= 1 << 27;
+
+ return 0;
+}
+
static struct nvc0_transform_feedback_state *
nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
const struct pipe_stream_output_info *pso)
@@ -565,6 +624,30 @@ nvc0_program_dump(struct nvc0_program *prog)
}
#endif
+static void
+nvc0_dump_header(struct nvc0_program *prog, struct nv50_ir_prog_info *info)
+{
+ FILE *fp = nv50_ir_begin_dump(info, "header", ".hdr", true);
+ if (fp) {
+ fwrite(prog->hdr, sizeof(prog->hdr), 1, fp);
+ fclose(fp);
+ }
+}
+
+static bool
+nvc0_replace_header(struct nvc0_program *prog, struct nv50_ir_prog_info *info)
+{
+ size_t size;
+ void *data;
+ if (!nv50_ir_get_replacement(info, "header", ".hdr", &size, &data))
+ return false;
+
+ memcpy(prog->hdr, data, MIN2(size, 20));
+ FREE(data);
+
+ return true;
+}
+
bool
nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
struct pipe_debug_callback *debug)
@@ -618,6 +701,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->assignSlots = nvc0_program_assign_varying_slots;
+ nv50_ir_create_source_hash(info);
+
ret = nv50_ir_generate_code(info);
if (ret) {
NOUVEAU_ERR("shader translation failed: %i\n", ret);
@@ -641,55 +726,12 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */
prog->vp.edgeflag = info->io.edgeFlagIn;
- switch (prog->type) {
- case PIPE_SHADER_VERTEX:
- ret = nvc0_vp_gen_header(prog, info);
- break;
- case PIPE_SHADER_TESS_CTRL:
- ret = nvc0_tcp_gen_header(prog, info);
- break;
- case PIPE_SHADER_TESS_EVAL:
- ret = nvc0_tep_gen_header(prog, info);
- break;
- case PIPE_SHADER_GEOMETRY:
- ret = nvc0_gp_gen_header(prog, info);
- break;
- case PIPE_SHADER_FRAGMENT:
- ret = nvc0_fp_gen_header(prog, info);
- break;
- case PIPE_SHADER_COMPUTE:
- prog->cp.syms = info->bin.syms;
- prog->cp.num_syms = info->bin.numSyms;
- break;
- default:
- ret = -1;
- NOUVEAU_ERR("unknown program type: %u\n", prog->type);
- break;
- }
- if (ret)
- goto out;
-
- if (info->bin.tlsSpace) {
- assert(info->bin.tlsSpace < (1 << 24));
- prog->hdr[0] |= 1 << 26;
- prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
- prog->need_tls = true;
+ if (!nvc0_replace_header(prog, info)) {
+ ret = nvc0_program_create_header(prog, info);
+ if (ret)
+ goto out;
+ nvc0_dump_header(prog, info);
}
- /* TODO: factor 2 only needed where joinat/precont is used,
- * and we only have to count non-uniform branches
- */
- /*
- if ((info->maxCFDepth * 2) > 16) {
- prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
- prog->need_tls = true;
- }
- */
- if (info->io.globalAccess)
- prog->hdr[0] |= 1 << 26;
- if (info->io.globalAccess & 0x2)
- prog->hdr[0] |= 1 << 16;
- if (info->io.fp64)
- prog->hdr[0] |= 1 << 27;
if (prog->pipe.stream_output.num_outputs)
prog->tfb = nvc0_program_create_tfb_state(info,
--
2.14.3
More information about the mesa-dev
mailing list