[Mesa-dev] [PATCH] radv: Enable RB+ where possible.

Samuel Pitoiset samuel.pitoiset at gmail.com
Mon Apr 9 08:04:20 UTC 2018



On 04/07/2018 01:48 PM, Bas Nieuwenhuizen wrote:
> According to Marek, not enabling it on Stoney has a significant
> negative performance impact. (And I guess this might impact
> performance on Raven as well)
> 
> The register settings are pretty much copied from radeonsi. I did
> not put this in the pipeline as that would make the pipeline more
> dependent on the format which mean we would have to have more
> pipelines for the meta shaders.
> ---
>   src/amd/vulkan/radv_cmd_buffer.c | 140 +++++++++++++++++++++++++++++++++++++++
>   src/amd/vulkan/radv_pipeline.c   |  13 ++--
>   src/amd/vulkan/radv_private.h    |   4 ++
>   src/amd/vulkan/si_cmd_buffer.c   |   7 ++
>   4 files changed, 158 insertions(+), 6 deletions(-)
> 
> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
> index d9f12a351e..dba744f798 100644
> --- a/src/amd/vulkan/radv_cmd_buffer.c
> +++ b/src/amd/vulkan/radv_cmd_buffer.c
> @@ -681,6 +681,142 @@ radv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
>   	state->prefetch_L2_mask &= ~mask;
>   }
>   
> +static void
> +radv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
> +{
> +	if (!cmd_buffer->device->physical_device->rbplus_allowed)
> +		return;
> +
> +	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
> +	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
> +	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
> +
> +	unsigned sx_ps_downconvert = 0;
> +	unsigned sx_blend_opt_epsilon = 0;
> +	unsigned sx_blend_opt_control = 0;
> +
> +	for (unsigned i = 0; subpass->color_count; ++i) {
> +		if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED)
> +			continue;
> +
> +		int idx = subpass->color_attachments[i].attachment;
> +		struct radv_color_buffer_info *cb = &framebuffer->attachments[idx].cb;
> +
> +		unsigned format = G_028C70_FORMAT(cb->cb_color_info);
> +		unsigned swap = G_028C70_COMP_SWAP(cb->cb_color_info);
> +		uint32_t spi_format = (pipeline->graphics.col_format >> (i * 4)) & 0xf;
> +		uint32_t colormask = (pipeline->graphics.cb_target_mask >> (i * 4)) & 0xf;
> +
> +		bool has_alpha, has_rgb;
> +
> +		/* Set if RGB and A are present. */
> +		has_alpha = !G_028C74_FORCE_DST_ALPHA_1(cb->cb_color_attrib);
> +
> +		if (format == V_028C70_COLOR_8 ||
> +		    format == V_028C70_COLOR_16 ||
> +		    format == V_028C70_COLOR_32)
> +			has_rgb = !has_alpha;
> +		else
> +			has_rgb = true;
> +
> +		/* Check the colormask and export format. */
> +		if (!(colormask & 0x7))
> +			has_rgb = false;
> +		if (!(colormask & 0x8))
> +			has_alpha = false;
> +
> +		if (spi_format == V_028714_SPI_SHADER_ZERO) {
> +			has_rgb = false;
> +			has_alpha = false;
> +		}
> +
> +		/* Disable value checking for disabled channels. */
> +		if (!has_rgb)
> +			sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
> +		if (!has_alpha)
> +			sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
> +
> +		/* Enable down-conversion for 32bpp and smaller formats. */
> +		switch (format) {
> +		case V_028C70_COLOR_8:
> +		case V_028C70_COLOR_8_8:
> +		case V_028C70_COLOR_8_8_8_8:
> +			/* For 1 and 2-channel formats, use the superset thereof. */
> +			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
> +			    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
> +			    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
> +				sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
> +			}
> +			break;
> +
> +		case V_028C70_COLOR_5_6_5:
> +			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
> +				sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
> +			}
> +			break;
> +
> +		case V_028C70_COLOR_1_5_5_5:
> +			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
> +				sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
> +			}
> +			break;
> +
> +		case V_028C70_COLOR_4_4_4_4:
> +			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
> +				sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
> +			}
> +			break;
> +
> +		case V_028C70_COLOR_32:
> +			if (swap == V_028C70_SWAP_STD &&
> +			    spi_format == V_028714_SPI_SHADER_32_R)
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
> +			else if (swap == V_028C70_SWAP_ALT_REV &&
> +				 spi_format == V_028714_SPI_SHADER_32_AR)
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
> +			break;
> +
> +		case V_028C70_COLOR_16:
> +		case V_028C70_COLOR_16_16:
> +			/* For 1-channel formats, use the superset thereof. */
> +			if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
> +			    spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
> +			    spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
> +			    spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
> +				if (swap == V_028C70_SWAP_STD ||
> +				    swap == V_028C70_SWAP_STD_REV)
> +					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
> +				else
> +					sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
> +			}
> +			break;
> +
> +		case V_028C70_COLOR_10_11_11:
> +			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
> +				sx_blend_opt_epsilon |= V_028758_11BIT_FORMAT << (i * 4);
> +			}
> +			break;
> +
> +		case V_028C70_COLOR_2_10_10_10:
> +			if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
> +				sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
> +				sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
> +			}
> +			break;
> +		}
> +	}
> +
> +	radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
> +	radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
> +	radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
> +	radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
> +}
> +
>   static void
>   radv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
>   {
> @@ -3004,6 +3140,10 @@ static void
>   radv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer,
>   			      const struct radv_draw_info *info)
>   {
> +	if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
> +	    cmd_buffer->state.emitted_pipeline != cmd_buffer->state.pipeline)
> +		radv_emit_rbplus_state(cmd_buffer);
> +
>   	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
>   		radv_emit_graphics_pipeline(cmd_buffer);
>   
> diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
> index 08abb9dbc4..6735b36846 100644
> --- a/src/amd/vulkan/radv_pipeline.c
> +++ b/src/amd/vulkan/radv_pipeline.c
> @@ -2543,17 +2543,15 @@ radv_pipeline_generate_blend_state(struct radeon_winsys_cs *cs,
>   
>   		radeon_set_context_reg_seq(cs, R_028760_SX_MRT0_BLEND_OPT, 8);
>   		radeon_emit_array(cs, blend->sx_mrt_blend_opt, 8);
> -
> -		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
> -		radeon_emit(cs, 0);	/* R_028754_SX_PS_DOWNCONVERT */
> -		radeon_emit(cs, 0);	/* R_028758_SX_BLEND_OPT_EPSILON */
> -		radeon_emit(cs, 0);	/* R_02875C_SX_BLEND_OPT_CONTROL */
>   	}
>   
>   	radeon_set_context_reg(cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format);
>   
>   	radeon_set_context_reg(cs, R_028238_CB_TARGET_MASK, blend->cb_target_mask);
>   	radeon_set_context_reg(cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask);
> +
> +	pipeline->graphics.col_format = blend->spi_shader_col_format;
> +	pipeline->graphics.cb_target_mask = blend->cb_target_mask;
>   }
>   
>   
> @@ -2993,6 +2991,9 @@ radv_compute_db_shader_control(const struct radv_device *device,
>   	else
>   		z_order = V_02880C_LATE_Z;
>   
> +	bool disable_rbplus = device->physical_device->has_rbplus &&
> +	                      !device->physical_device->rbplus_allowed;

We can rely on rbplus_allowed directly.

> +
>   	return  S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
>   		S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
>   		S_02880C_KILL_ENABLE(!!ps->info.fs.can_discard) |
> @@ -3001,7 +3002,7 @@ radv_compute_db_shader_control(const struct radv_device *device,
>   		S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
>   		S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
>   		S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory) |
> -		S_02880C_DUAL_QUAD_DISABLE(!!device->physical_device->has_rbplus);
> +		S_02880C_DUAL_QUAD_DISABLE(disable_rbplus);
>   }
>   
>   static void
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 9e655af844..b33a5887b1 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1238,6 +1238,10 @@ struct radv_pipeline {
>    			bool can_use_guardband;
>   			uint32_t needed_dynamic_state;
>   			bool disable_out_of_order_rast_for_occlusion;
> +
> +			/* Used for rbplus */
> +			uint32_t col_format;
> +			uint32_t cb_target_mask;
>   		} graphics;
>   	};
>   
> diff --git a/src/amd/vulkan/si_cmd_buffer.c b/src/amd/vulkan/si_cmd_buffer.c
> index aed291be35..37855a714f 100644
> --- a/src/amd/vulkan/si_cmd_buffer.c
> +++ b/src/amd/vulkan/si_cmd_buffer.c
> @@ -554,6 +554,13 @@ si_emit_config(struct radv_physical_device *physical_device,
>   				       small_prim_filter_cntl);
>   	}
>   
> +	if (physical_device->has_rbplus && !physical_device->rbplus_allowed) {
> +		radeon_set_context_reg_seq(cs, R_028754_SX_PS_DOWNCONVERT, 3);
> +		radeon_emit(cs, 0);     /* R_028754_SX_PS_DOWNCONVERT */
> +		radeon_emit(cs, 0);     /* R_028758_SX_BLEND_OPT_EPSILON */
> +		radeon_emit(cs, 0);     /* R_02875C_SX_BLEND_OPT_CONTROL */
> +	}

This is actually useless because CLEAR_STATE initializes these registers 
correctly.

> +
>   	si_emit_compute(physical_device, cs);
>   }
>   
> 


More information about the mesa-dev mailing list