[Mesa-dev] [PATCH 18/53] intel/compiler: Add and use helpers for working with KSP indices
Jason Ekstrand
jason at jlekstrand.net
Thu May 24 21:56:00 UTC 2018
The pixel shader dispatch table is kind-of a confusing mess. This adds
some helpers for dealing with it and for easily extracting the correct
data from wm_prog_data.
---
src/intel/blorp/blorp_genX_exec.h | 65 ++++++++++++--------
src/intel/compiler/brw_compiler.h | 85 +++++++++++++++++++++++++++
src/intel/vulkan/genX_pipeline.c | 18 +++---
src/mesa/drivers/dri/i965/gen4_blorp_exec.h | 16 +++--
src/mesa/drivers/dri/i965/genX_state_upload.c | 54 +++++++++++------
5 files changed, 183 insertions(+), 55 deletions(-)
diff --git a/src/intel/blorp/blorp_genX_exec.h b/src/intel/blorp/blorp_genX_exec.h
index 446743b..7373ffc 100644
--- a/src/intel/blorp/blorp_genX_exec.h
+++ b/src/intel/blorp/blorp_genX_exec.h
@@ -759,17 +759,22 @@ blorp_emit_ps_config(struct blorp_batch *batch,
}
if (prog_data) {
- ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
- ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
-
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
- ps.KernelStartPointer0 = params->wm_prog_kernel;
- ps.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
+ ps.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
+ ps.DispatchGRFStartRegisterForConstantSetupData2 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
+
+ ps.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
+ ps.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
}
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64
@@ -863,17 +868,22 @@ blorp_emit_ps_config(struct blorp_batch *batch,
#endif
if (prog_data) {
+ ps._8PixelDispatchEnable = prog_data->dispatch_8;
+ ps._16PixelDispatchEnable = prog_data->dispatch_16;
+
ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
-
- ps.KernelStartPointer0 = params->wm_prog_kernel;
- ps.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
- ps._8PixelDispatchEnable = prog_data->dispatch_8;
- ps._16PixelDispatchEnable = prog_data->dispatch_16;
+ ps.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
+ ps.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
ps.AttributeEnable = prog_data->num_varying_inputs > 0;
} else {
@@ -925,18 +935,23 @@ blorp_emit_ps_config(struct blorp_batch *batch,
if (prog_data) {
wm.ThreadDispatchEnable = true;
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
- wm.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
-
- wm.KernelStartPointer0 = params->wm_prog_kernel;
- wm.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
-
wm._8PixelDispatchEnable = prog_data->dispatch_8;
wm._16PixelDispatchEnable = prog_data->dispatch_16;
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 1);
+ wm.DispatchGRFStartRegisterForConstantSetupData2 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm, 2);
+
+ wm.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 0);
+ wm.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 1);
+ wm.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 2);
+
wm.NumberofSFOutputAttributes = prog_data->num_varying_inputs;
}
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 8b4e6fe..a8ae243 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -745,6 +745,91 @@ struct brw_wm_prog_data {
int urb_setup[VARYING_SLOT_MAX];
};
+/** Returns the SIMD width corresponding to a given KSP index
+ *
+ * The "Variable Pixel Dispatch" table in the PRM (which can be found, for
+ * example in Vol. 7 of the SKL PRM) has a mapping from dispatch widths to
+ * kernel start pointer (KSP) indices that is based on what dispatch widths
+ * are enabled. This function provides, effectively, the reverse mapping.
+ *
+ * If the given KSP is valid with respect to the SIMD8/16/32 enables, a SIMD
+ * width of 8, 16, or 32 is returned. If the KSP is invalid, 0 is returned.
+ */
+static inline unsigned
+brw_fs_simd_width_for_ksp(unsigned ksp_idx, bool simd8_enabled,
+ bool simd16_enabled, bool simd32_enabled)
+{
+ /* This function strictly ignores contiguous dispatch */
+ switch (ksp_idx) {
+ case 0:
+ return simd8_enabled ? 8 :
+ (simd16_enabled && !simd32_enabled) ? 16 :
+ (simd32_enabled && !simd16_enabled) ? 32 : 0;
+ case 1:
+ return (simd32_enabled && (simd16_enabled || simd8_enabled)) ? 32 : 0;
+ case 2:
+ return (simd16_enabled && (simd32_enabled || simd8_enabled)) ? 16 : 0;
+ default:
+ unreachable("Invalid KSP index");
+ }
+}
+
+#define brw_wm_state_simd_width_for_ksp(wm_state, ksp_idx) \
+ brw_fs_simd_width_for_ksp((ksp_idx), (wm_state)._8PixelDispatchEnable, \
+ (wm_state)._16PixelDispatchEnable, \
+ (wm_state)._32PixelDispatchEnable)
+
+#define brw_wm_state_has_ksp(wm_state, ksp_idx) \
+ (brw_wm_state_simd_width_for_ksp((wm_state), (ksp_idx)) != 0)
+
+static inline uint32_t
+_brw_wm_prog_data_prog_offset(const struct brw_wm_prog_data *prog_data,
+ unsigned ksp_idx)
+{
+ switch (ksp_idx) {
+ case 0: return 0;
+ case 1: return 0;
+ case 2: return prog_data->prog_offset_2;
+ default:
+ unreachable("Invalid KSP index");
+ }
+}
+
+#define brw_wm_prog_data_prog_offset(prog_data, wm_state, ksp_idx) \
+ _brw_wm_prog_data_prog_offset(prog_data, ksp_idx)
+
+static inline uint8_t
+_brw_wm_prog_data_dispatch_grf_start_reg(const struct brw_wm_prog_data *prog_data,
+ unsigned ksp_idx)
+{
+ switch (ksp_idx) {
+ case 0: return prog_data->base.dispatch_grf_start_reg;
+ case 1: return 0;
+ case 2: return prog_data->dispatch_grf_start_reg_2;
+ default:
+ unreachable("Invalid KSP index");
+ }
+}
+
+#define brw_wm_prog_data_dispatch_grf_start_reg(prog_data, wm_state, ksp_idx) \
+ _brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ksp_idx)
+
+static inline uint8_t
+_brw_wm_prog_data_reg_blocks(const struct brw_wm_prog_data *prog_data,
+ unsigned ksp_idx)
+{
+ switch (ksp_idx) {
+ case 0: return prog_data->reg_blocks_0;
+ case 1: return 0;
+ case 2: return prog_data->reg_blocks_2;
+ default:
+ unreachable("Invalid KSP index");
+ }
+}
+
+#define brw_wm_prog_data_reg_blocks(prog_data, wm_state, ksp_idx) \
+ _brw_wm_prog_data_reg_blocks(prog_data, ksp_idx)
+
struct brw_push_const_block {
unsigned dwords; /* Dword count, not reg aligned */
unsigned regs;
diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
index 6016d25..8988bc4 100644
--- a/src/intel/vulkan/genX_pipeline.c
+++ b/src/intel/vulkan/genX_pipeline.c
@@ -1488,14 +1488,17 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
#endif
anv_batch_emit(&pipeline->batch, GENX(3DSTATE_PS), ps) {
- ps.KernelStartPointer0 = fs_bin->kernel.offset;
- ps.KernelStartPointer1 = 0;
- ps.KernelStartPointer2 = fs_bin->kernel.offset +
- wm_prog_data->prog_offset_2;
ps._8PixelDispatchEnable = wm_prog_data->dispatch_8;
ps._16PixelDispatchEnable = wm_prog_data->dispatch_16;
ps._32PixelDispatchEnable = false;
+ ps.KernelStartPointer0 = fs_bin->kernel.offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
+ ps.KernelStartPointer1 = fs_bin->kernel.offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
+ ps.KernelStartPointer2 = fs_bin->kernel.offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
+
ps.SingleProgramFlow = false;
ps.VectorMaskEnable = true;
ps.SamplerCount = get_sampler_count(fs_bin);
@@ -1526,10 +1529,11 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
#endif
ps.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
- ps.DispatchGRFStartRegisterForConstantSetupData1 = 0;
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
- wm_prog_data->dispatch_grf_start_reg_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
ps.ScratchSpaceBasePointer =
diff --git a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
index e59bc9f..e3b90f1 100644
--- a/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
+++ b/src/mesa/drivers/dri/i965/gen4_blorp_exec.h
@@ -136,13 +136,17 @@ blorp_emit_wm_state(struct blorp_batch *batch,
#if GEN_GEN == 4
wm.KernelStartPointer0 =
instruction_state_address(batch, params->wm_prog_kernel);
- wm.GRFRegisterCount0 = prog_data->reg_blocks_0;
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
#else
- wm.KernelStartPointer0 = params->wm_prog_kernel;
- wm.GRFRegisterCount0 = prog_data->reg_blocks_0;
- wm.KernelStartPointer2 =
- params->wm_prog_kernel + prog_data->prog_offset_2;
- wm.GRFRegisterCount2 = prog_data->reg_blocks_2;
+ wm.KernelStartPointer0 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 0);
+ wm.KernelStartPointer1 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 1);
+ wm.KernelStartPointer2 = params->wm_prog_kernel +
+ brw_wm_prog_data_prog_offset(prog_data, wm, 2);
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(prog_data, wm, 0);
+ wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(prog_data, wm, 1);
+ wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(prog_data, wm, 2);
#endif
}
diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c
index 6d430f6..8253ef5 100644
--- a/src/mesa/drivers/dri/i965/genX_state_upload.c
+++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
@@ -1899,43 +1899,57 @@ genX(upload_wm)(struct brw_context *brw)
#if GEN_GEN == 4
/* On gen4, we only have one shader kernel */
- if (wm_prog_data->dispatch_8 || wm_prog_data->dispatch_16) {
+ if (brw_wm_state_has_ksp(wm, 0)) {
+ assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0);
wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
- wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
wm.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
}
#elif GEN_GEN == 5
/* On gen5, we have multiple shader kernels but only one GRF start
* register for all kernels
*/
- wm.KernelStartPointer0 = stage_state->prog_offset;
+ wm.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
+ wm.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
wm.KernelStartPointer2 = stage_state->prog_offset +
- wm_prog_data->prog_offset_2;
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
- wm.GRFRegisterCount0 = wm_prog_data->reg_blocks_0;
- wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
+ wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
+ wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
wm.DispatchGRFStartRegisterForConstantSetupData0 =
wm_prog_data->base.dispatch_grf_start_reg;
/* Dispatch GRF Start should be the same for all shaders on gen5 */
- if (wm_prog_data->dispatch_8 && wm_prog_data->dispatch_16) {
+ if (brw_wm_state_has_ksp(wm, 1)) {
assert(wm_prog_data->base.dispatch_grf_start_reg ==
- wm_prog_data->dispatch_grf_start_reg_2);
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1));
+ }
+ if (brw_wm_state_has_ksp(wm, 2)) {
+ assert(wm_prog_data->base.dispatch_grf_start_reg ==
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
}
#elif GEN_GEN == 6
/* On gen5, we have multiple shader kernels and we no longer specify a
* register count for each one.
*/
- wm.KernelStartPointer0 = stage_state->prog_offset;
+ wm.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
+ wm.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
wm.KernelStartPointer2 = stage_state->prog_offset +
- wm_prog_data->prog_offset_2;
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
wm.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
wm.DispatchGRFStartRegisterForConstantSetupData2 =
- wm_prog_data->dispatch_grf_start_reg_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
#endif
#if GEN_GEN <= 5
@@ -4015,14 +4029,20 @@ genX(upload_ps)(struct brw_context *brw)
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
+
ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
- ps.KernelStartPointer0 = stage_state->prog_offset;
+ ps.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = stage_state->prog_offset +
- prog_data->prog_offset_2;
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
if (prog_data->base.total_scratch) {
ps.ScratchSpaceBasePointer =
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list