[PATCH 1/2] radeon: use GB_GR and BG_RG formats for packed yuv video for r600+
Roland Scheidegger
rscheidegger_lists at hispeed.ch
Thu May 10 20:25:32 PDT 2012
Those formats were invented for exactly that purpose so use them.
This saves some code and also some hw resources (only need one
sampler instead of two for packed yuv).
(Note the output is not quite pixel exact probably some rounding errors
on coords before caused some subtle chroma filter bugs, or the bilinear filter
is just slightly different for that format.)
Only tested on EG.
---
src/cayman_shader.c | 36 ++----------------------------
src/evergreen_shader.c | 36 ++----------------------------
src/evergreen_textured_videofuncs.c | 41 ++++++-----------------------------
src/r600_shader.c | 34 ++--------------------------
src/r600_textured_videofuncs.c | 41 ++++++-----------------------------
5 files changed, 23 insertions(+), 165 deletions(-)
diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 01b612a..18e9f50 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -1338,7 +1338,7 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
CF_INST(SQ_CF_INST_TC),
BARRIER(1));
@@ -1365,8 +1365,8 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1382,36 +1382,6 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 34/35 */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
- DST_SEL_W(SQ_SEL_MASK),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
return i;
}
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index bbdd7a7..a6faba0 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -1306,7 +1306,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
VALID_PIXEL_MODE(0),
END_OF_PROGRAM(0),
CF_INST(SQ_CF_INST_TC),
@@ -1337,8 +1337,8 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1354,36 +1354,6 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 34/35 */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- INST_MOD(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- ALT_CONST(0),
- RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
- SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
- DST_SEL_W(SQ_SEL_MASK),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
return i;
}
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 8ca8e62..7c5d15d 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -331,7 +331,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
default:
accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
- /* Y texture */
+ /* YUV texture */
tex_res.id = 0;
tex_res.w = accel_state->src_obj[0].width;
tex_res.h = accel_state->src_obj[0].height;
@@ -345,13 +345,13 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.mip_bo = accel_state->src_obj[0].bo;
tex_res.surface = NULL;
- tex_res.format = FMT_8_8;
if (pPriv->id == FOURCC_UYVY)
- tex_res.dst_sel_x = SQ_SEL_Y; /* Y */
+ tex_res.format = FMT_GB_GR;
else
- tex_res.dst_sel_x = SQ_SEL_X; /* Y */
- tex_res.dst_sel_y = SQ_SEL_1;
- tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.format = FMT_BG_RG;
+ tex_res.dst_sel_x = SQ_SEL_Y;
+ tex_res.dst_sel_y = SQ_SEL_X;
+ tex_res.dst_sel_z = SQ_SEL_Z;
tex_res.dst_sel_w = SQ_SEL_1;
tex_res.base_level = 0;
@@ -362,7 +362,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.array_mode = 1;
evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
- /* Y sampler */
+ /* YUV sampler */
tex_samp.id = 0;
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -375,33 +375,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_samp.mip_filter = 0; /* no mipmap */
evergreen_set_tex_sampler(pScrn, &tex_samp);
- /* UV texture */
- tex_res.id = 1;
- tex_res.format = FMT_8_8_8_8;
- tex_res.w = accel_state->src_obj[0].width >> 1;
- tex_res.h = accel_state->src_obj[0].height;
- tex_res.pitch = accel_state->src_obj[0].pitch >> 2;
- if (pPriv->id == FOURCC_UYVY) {
- tex_res.dst_sel_x = SQ_SEL_X; /* V */
- tex_res.dst_sel_y = SQ_SEL_Z; /* U */
- } else {
- tex_res.dst_sel_x = SQ_SEL_Y; /* V */
- tex_res.dst_sel_y = SQ_SEL_W; /* U */
- }
- tex_res.dst_sel_z = SQ_SEL_1;
- tex_res.dst_sel_w = SQ_SEL_1;
- tex_res.interlaced = 0;
-
- tex_res.base = accel_state->src_obj[0].offset;
- tex_res.mip_base = accel_state->src_obj[0].offset;
- tex_res.size = accel_state->src_size[0];
- if (accel_state->src_obj[0].tiling_flags == 0)
- tex_res.array_mode = 1;
- evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
-
- /* UV sampler */
- tex_samp.id = 1;
- evergreen_set_tex_sampler(pScrn, &tex_samp);
break;
}
diff --git a/src/r600_shader.c b/src/r600_shader.c
index ab2f485..4cb2fc8 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1090,7 +1090,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = CF_DWORD1(POP_COUNT(0),
CF_CONST(0),
COND(SQ_CF_COND_ACTIVE),
- I_COUNT(2),
+ I_COUNT(1),
CALL_COUNT(0),
END_OF_PROGRAM(0),
VALID_PIXEL_MODE(0),
@@ -1120,8 +1120,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
shader[i++] = TEX_DWORD1(DST_GPR(1),
DST_REL(ABSOLUTE),
DST_SEL_X(SQ_SEL_X),
- DST_SEL_Y(SQ_SEL_MASK),
- DST_SEL_Z(SQ_SEL_MASK),
+ DST_SEL_Y(SQ_SEL_Y),
+ DST_SEL_Z(SQ_SEL_Z),
DST_SEL_W(SQ_SEL_1),
LOD_BIAS(0),
COORD_TYPE_X(TEX_NORMALIZED),
@@ -1137,34 +1137,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
SRC_SEL_Z(SQ_SEL_0),
SRC_SEL_W(SQ_SEL_1));
shader[i++] = TEX_DWORD_PAD;
- /* 28/29 */
- shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
- BC_FRAC_MODE(0),
- FETCH_WHOLE_QUAD(0),
- RESOURCE_ID(1),
- SRC_GPR(0),
- SRC_REL(ABSOLUTE),
- R7xx_ALT_CONST(0));
- shader[i++] = TEX_DWORD1(DST_GPR(1),
- DST_REL(ABSOLUTE),
- DST_SEL_X(SQ_SEL_MASK),
- DST_SEL_Y(SQ_SEL_X),
- DST_SEL_Z(SQ_SEL_Y),
- DST_SEL_W(SQ_SEL_MASK),
- LOD_BIAS(0),
- COORD_TYPE_X(TEX_NORMALIZED),
- COORD_TYPE_Y(TEX_NORMALIZED),
- COORD_TYPE_Z(TEX_NORMALIZED),
- COORD_TYPE_W(TEX_NORMALIZED));
- shader[i++] = TEX_DWORD2(OFFSET_X(0),
- OFFSET_Y(0),
- OFFSET_Z(0),
- SAMPLER_ID(1),
- SRC_SEL_X(SQ_SEL_X),
- SRC_SEL_Y(SQ_SEL_Y),
- SRC_SEL_Z(SQ_SEL_0),
- SRC_SEL_W(SQ_SEL_1));
- shader[i++] = TEX_DWORD_PAD;
return i;
}
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 62da992..7610050 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -358,7 +358,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
default:
accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
- /* Y texture */
+ /* YUV texture */
tex_res.id = 0;
tex_res.w = accel_state->src_obj[0].width;
tex_res.h = accel_state->src_obj[0].height;
@@ -371,13 +371,13 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.bo = accel_state->src_obj[0].bo;
tex_res.mip_bo = accel_state->src_obj[0].bo;
- tex_res.format = FMT_8_8;
if (pPriv->id == FOURCC_UYVY)
- tex_res.dst_sel_x = SQ_SEL_Y; /* Y */
+ tex_res.format = FMT_GB_GR;
else
- tex_res.dst_sel_x = SQ_SEL_X; /* Y */
- tex_res.dst_sel_y = SQ_SEL_1;
- tex_res.dst_sel_z = SQ_SEL_1;
+ tex_res.format = FMT_BG_RG;
+ tex_res.dst_sel_x = SQ_SEL_Y;
+ tex_res.dst_sel_y = SQ_SEL_X;
+ tex_res.dst_sel_z = SQ_SEL_Z;
tex_res.dst_sel_w = SQ_SEL_1;
tex_res.request_size = 1;
@@ -389,7 +389,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_res.tile_mode = 1;
r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
- /* Y sampler */
+ /* YUV sampler */
tex_samp.id = 0;
tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL;
tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -403,33 +403,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
tex_samp.mip_filter = 0; /* no mipmap */
r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
- /* UV texture */
- tex_res.id = 1;
- tex_res.format = FMT_8_8_8_8;
- tex_res.w = accel_state->src_obj[0].width >> 1;
- tex_res.h = accel_state->src_obj[0].height;
- tex_res.pitch = accel_state->src_obj[0].pitch >> 2;
- if (pPriv->id == FOURCC_UYVY) {
- tex_res.dst_sel_x = SQ_SEL_X; /* V */
- tex_res.dst_sel_y = SQ_SEL_Z; /* U */
- } else {
- tex_res.dst_sel_x = SQ_SEL_Y; /* V */
- tex_res.dst_sel_y = SQ_SEL_W; /* U */
- }
- tex_res.dst_sel_z = SQ_SEL_1;
- tex_res.dst_sel_w = SQ_SEL_1;
- tex_res.interlaced = 0;
-
- tex_res.base = accel_state->src_obj[0].offset;
- tex_res.mip_base = accel_state->src_obj[0].offset;
- tex_res.size = accel_state->src_size[0];
- if (accel_state->src_obj[0].tiling_flags == 0)
- tex_res.tile_mode = 1;
- r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
-
- /* UV sampler */
- tex_samp.id = 1;
- r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
break;
}
--
1.7.7
More information about the xorg-driver-ati
mailing list