[Mesa-dev] [PATCH] radv: Enable RB+ where possible.
Samuel Pitoiset
samuel.pitoiset at gmail.com
Mon Apr 9 08:04:20 UTC 2018
On 04/07/2018 01:48 PM, Bas Nieuwenhuizen wrote:
> According to Marek, not enabling it on Stoney has a significant
> negative performance impact. (And I guess this might impact
> performance on Raven as well)
>
> The register settings are pretty much copied from radeonsi. I did
> not put this in the pipeline as that would make the pipeline more
> dependent on the format which mean we would have to have more
> pipelines for the meta shaders.
> ---
> src/amd/vulkan/radv_cmd_buffer.c | 140 +++++++++++++++++++++++++++++++++++++++
> src/amd/vulkan/radv_pipeline.c | 13 ++--
> src/amd/vulkan/radv_private.h | 4 ++
> src/amd/vulkan/si_cmd_buffer.c | 7 ++
> 4 files changed, 158 insertions(+), 6 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index d9f12a351e..dba744f798 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -681,6 +681,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
> state->prefetch_L2_mask &= ~mask;
> }
>
> +static void
> +radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
> +{
> + if (!cmd_buffer->device->physical_device->rbplus_allowed)
> + return;
> +
> + struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
> + struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
> + const struct radv_subpass *subpass = cmd_buffer->state.subpass;
> +
> + unsigned sx_ps_downconvert = 0;
> + unsigned sx_blend_opt_epsilon = 0;
> + unsigned sx_blend_opt_control = 0;
> +
> + for (unsigned i = 0; subpass->color_count; ++i) {
> + if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
> + continue;
> +
> + int idx = subpass->color_attachments[i].attachment;
> + struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb;
> +
> + unsigned format = G_028C70_FORMAT(cb->cb_color_info);
> + unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
> + uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
> + uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
> +
> + bool has_alpha, has_rgb;
> +
> + /* Set if RGB and A are present. */
> + has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
> +
> + if (format == V_028C70_COLOR_8 ||
> + format == V_028C70_COLOR_16 ||
> + format == V_028C70_COLOR_32)
> + has_rgb = !has_alpha;
> + else
> + has_rgb = true;
> +
> + /* Check the colormask and export format. */
> + if (!(colormask & 0x7))
> + has_rgb = false;
> + if (!(colormask & 0x8))
> + has_alpha = false;
> +
> + if (spi_format == V_028714_SPI_SHADER_ZERO) {
> + has_rgb = false;
> + has_alpha = false;
> + }
> +
> + /* Disable value checking for disabled channels. */
> + if (!has_rgb)
> + sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
> + if (!has_alpha)
> + sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
> +
> + /* Enable down-conversion for 32bpp and smaller formats. */
> + switch (format) {
> + case V_028C70_COLOR_8:
> + case V_028C70_COLOR_8_8:
> + case V_028C70_COLOR_8_8_8_8:
> + /* For 1 and 2-channel formats, use the superset thereof. */
> + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
> + spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
> + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
> + sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
> + }
> + break;
> +
> + case V_028C70_COLOR_5_6_5:
> + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
> + sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
> + }
> + break;
> +
> + case V_028C70_COLOR_1_5_5_5:
> + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
> + sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
> + }
> + break;
> +
> + case V_028C70_COLOR_4_4_4_4:
> + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
> + sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
> + }
> + break;
> +
> + case V_028C70_COLOR_32:
> + if (swap == V_028C70_SWAP_STD &&
> + spi_format == V_028714_SPI_SHADER_32_R)
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
> + else if (swap == V_028C70_SWAP_ALT_REV &&
> + spi_format == V_028714_SPI_SHADER_32_AR)
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
> + break;
> +
> + case V_028C70_COLOR_16:
> + case V_028C70_COLOR_16_16:
> + /* For 1-channel formats, use the superset thereof. */
> + if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
> + spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
> + spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
> + spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
> + if (swap == V_028C70_SWAP_STD ||
> + swap == V_028C70_SWAP_STD_REV)
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
> + else
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
> + }
> + break;
> +
> + case V_028C70_COLOR_10_11_11:
> + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
> + sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
> + }
> + break;
> +
> + case V_028C70_COLOR_2_10_10_10:
> + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
> + sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
> + }
> + break;
> + }
> + }
> +
> + radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
> + radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
> + radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
> + radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
> +}
> +
> static void
> radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
> {
> @@ -3004,6 +3140,10 @@ static void
> radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
> const struct radv_draw_info *info)
> {
> + if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
> + cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
> + radv_emit_rbplus_state(cmd_buffer);
> +
> if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
> radv_emit_graphics_pipeline(cmd_buffer);
>
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 08abb9dbc4..6735b36846 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs,
>
> radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
> radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
> -
> - radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
> - radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */
> - radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */
> - radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */
> }
>
> radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
>
> radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
> radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
> +
> + pipeline->graphics.col_format = blend->spi_shader_col_format;
> + pipeline->graphics.cb_target_mask = blend->cb_target_mask;
> }
>
>
> @@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device *device,
> else
> z_order = V_02880C_LATE_Z;
>
> + bool disable_rbplus = device->physical_device->has_rbplus &&
> + !device->physical_device->rbplus_allowed;
We can rely on rbplus_allowed directly.
> +
> return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
> S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
> S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
> @@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device *device,
> S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
> S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
> S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
> - S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus);
> + S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
> }
>
> static void
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 9e655af844..b33a5887b1 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1238,6 +1238,10 @@ struct radv_pipeline {
> bool can_use_guardband;
> uint32_t needed_dynamic_state;
> bool disable_out_of_order_rast_for_occlusion;
> +
> + /* Used for rbplus */
> + uint32_t col_format;
> + uint32_t cb_target_mask;
> } graphics;
> };
>
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index aed291be35..37855a714f 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -554,6 +554,13 @@ si_emit_config(struct radv_physical_device *physical_device,
> small_prim_filter_cntl);
> }
>
> + if (physical_device->has_rbplus && !physical_device->rbplus_allowed) {
> + radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
> + radeon_emit(cs, 0); /* R_028754_SX_PS_DOWNCONVERT */
> + radeon_emit(cs, 0); /* R_028758_SX_BLEND_OPT_EPSILON */
> + radeon_emit(cs, 0); /* R_02875C_SX_BLEND_OPT_CONTROL */
> + }
This is actually useless because CLEAR_STATE initializes these registers
correctly.
> +
> si_emit_compute(physical_device, cs);
> }
>
>
More information about the mesa-dev
mailing list