[PATCH 1/2] radeon: use GB_GR and BG_RG formats for packed yuv video for r600+

Roland Scheidegger rscheidegger_lists at hispeed.ch
Thu May 10 20:25:32 PDT 2012


Those formats were invented for exactly that purpose so use them.
This saves some code and also some hw resources (only need one
sampler instead of two for packed yuv).
(Note the output is not quite pixel exact probably some rounding errors
on coords before caused some subtle chroma filter bugs, or the bilinear filter
is just slightly different for that format.)
Only tested on EG.
---
 src/cayman_shader.c                 |   36 ++----------------------------
 src/evergreen_shader.c              |   36 ++----------------------------
 src/evergreen_textured_videofuncs.c |   41 ++++++-----------------------------
 src/r600_shader.c                   |   34 ++--------------------------
 src/r600_textured_videofuncs.c      |   41 ++++++-----------------------------
 5 files changed, 23 insertions(+), 165 deletions(-)

diff --git a/src/cayman_shader.c b/src/cayman_shader.c
index 01b612a..18e9f50 100644
--- a/src/cayman_shader.c
+++ b/src/cayman_shader.c
@@ -1338,7 +1338,7 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_ACTIVE),
-                            I_COUNT(2),
+                            I_COUNT(1),
                             VALID_PIXEL_MODE(0),
                             CF_INST(SQ_CF_INST_TC),
                             BARRIER(1));
@@ -1365,8 +1365,8 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = TEX_DWORD1(DST_GPR(1),
                              DST_REL(ABSOLUTE),
                              DST_SEL_X(SQ_SEL_X),
-                             DST_SEL_Y(SQ_SEL_MASK),
-                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_Y),
+                             DST_SEL_Z(SQ_SEL_Z),
                              DST_SEL_W(SQ_SEL_1),
                              LOD_BIAS(0),
                              COORD_TYPE_X(TEX_NORMALIZED),
@@ -1382,36 +1382,6 @@ int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                              SRC_SEL_Z(SQ_SEL_0),
                              SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 34/35 */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                             INST_MOD(0),
-                             FETCH_WHOLE_QUAD(0),
-                             RESOURCE_ID(1),
-                             SRC_GPR(0),
-                             SRC_REL(ABSOLUTE),
-                             ALT_CONST(0),
-                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
-                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-                             DST_REL(ABSOLUTE),
-                             DST_SEL_X(SQ_SEL_MASK),
-                             DST_SEL_Y(SQ_SEL_X),
-                             DST_SEL_Z(SQ_SEL_Y),
-                             DST_SEL_W(SQ_SEL_MASK),
-                             LOD_BIAS(0),
-                             COORD_TYPE_X(TEX_NORMALIZED),
-                             COORD_TYPE_Y(TEX_NORMALIZED),
-                             COORD_TYPE_Z(TEX_NORMALIZED),
-                             COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                             OFFSET_Y(0),
-                             OFFSET_Z(0),
-                             SAMPLER_ID(1),
-                             SRC_SEL_X(SQ_SEL_X),
-                             SRC_SEL_Y(SQ_SEL_Y),
-                             SRC_SEL_Z(SQ_SEL_0),
-                             SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
 
     return i;
 }
diff --git a/src/evergreen_shader.c b/src/evergreen_shader.c
index bbdd7a7..a6faba0 100644
--- a/src/evergreen_shader.c
+++ b/src/evergreen_shader.c
@@ -1306,7 +1306,7 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_ACTIVE),
-                            I_COUNT(2),
+                            I_COUNT(1),
                             VALID_PIXEL_MODE(0),
                             END_OF_PROGRAM(0),
                             CF_INST(SQ_CF_INST_TC),
@@ -1337,8 +1337,8 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = TEX_DWORD1(DST_GPR(1),
                              DST_REL(ABSOLUTE),
                              DST_SEL_X(SQ_SEL_X),
-                             DST_SEL_Y(SQ_SEL_MASK),
-                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_Y),
+                             DST_SEL_Z(SQ_SEL_Z),
                              DST_SEL_W(SQ_SEL_1),
                              LOD_BIAS(0),
                              COORD_TYPE_X(TEX_NORMALIZED),
@@ -1354,36 +1354,6 @@ int evergreen_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                              SRC_SEL_Z(SQ_SEL_0),
                              SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 34/35 */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                             INST_MOD(0),
-                             FETCH_WHOLE_QUAD(0),
-                             RESOURCE_ID(1),
-                             SRC_GPR(0),
-                             SRC_REL(ABSOLUTE),
-                             ALT_CONST(0),
-                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
-                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-                             DST_REL(ABSOLUTE),
-                             DST_SEL_X(SQ_SEL_MASK),
-                             DST_SEL_Y(SQ_SEL_X),
-                             DST_SEL_Z(SQ_SEL_Y),
-                             DST_SEL_W(SQ_SEL_MASK),
-                             LOD_BIAS(0),
-                             COORD_TYPE_X(TEX_NORMALIZED),
-                             COORD_TYPE_Y(TEX_NORMALIZED),
-                             COORD_TYPE_Z(TEX_NORMALIZED),
-                             COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                             OFFSET_Y(0),
-                             OFFSET_Z(0),
-                             SAMPLER_ID(1),
-                             SRC_SEL_X(SQ_SEL_X),
-                             SRC_SEL_Y(SQ_SEL_Y),
-                             SRC_SEL_Z(SQ_SEL_0),
-                             SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
 
     return i;
 }
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 8ca8e62..7c5d15d 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -331,7 +331,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     default:
 	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
 
-	/* Y texture */
+	/* YUV texture */
 	tex_res.id                  = 0;
 	tex_res.w                   = accel_state->src_obj[0].width;
 	tex_res.h                   = accel_state->src_obj[0].height;
@@ -345,13 +345,13 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.mip_bo              = accel_state->src_obj[0].bo;
 	tex_res.surface             = NULL;
 
-	tex_res.format              = FMT_8_8;
 	if (pPriv->id == FOURCC_UYVY)
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
+	    tex_res.format              = FMT_GB_GR;
 	else
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
-	tex_res.dst_sel_y           = SQ_SEL_1;
-	tex_res.dst_sel_z           = SQ_SEL_1;
+	    tex_res.format              = FMT_BG_RG;
+	tex_res.dst_sel_x           = SQ_SEL_Y;
+	tex_res.dst_sel_y           = SQ_SEL_X;
+	tex_res.dst_sel_z           = SQ_SEL_Z;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 
 	tex_res.base_level          = 0;
@@ -362,7 +362,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	    tex_res.array_mode          = 1;
 	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
 
-	/* Y sampler */
+	/* YUV sampler */
 	tex_samp.id                 = 0;
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -375,33 +375,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_samp.mip_filter         = 0;			/* no mipmap */
 	evergreen_set_tex_sampler(pScrn, &tex_samp);
 
-	/* UV texture */
-	tex_res.id                  = 1;
-	tex_res.format              = FMT_8_8_8_8;
-	tex_res.w                   = accel_state->src_obj[0].width >> 1;
-	tex_res.h                   = accel_state->src_obj[0].height;
-	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
-	if (pPriv->id == FOURCC_UYVY) {
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
-	} else {
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
-	}
-	tex_res.dst_sel_z           = SQ_SEL_1;
-	tex_res.dst_sel_w           = SQ_SEL_1;
-	tex_res.interlaced          = 0;
-
-	tex_res.base                = accel_state->src_obj[0].offset;
-	tex_res.mip_base            = accel_state->src_obj[0].offset;
-	tex_res.size                = accel_state->src_size[0];
-	if (accel_state->src_obj[0].tiling_flags == 0)
-	    tex_res.array_mode          = 1;
-	evergreen_set_tex_resource(pScrn, &tex_res, accel_state->src_obj[0].domain);
-
-	/* UV sampler */
-	tex_samp.id                 = 1;
-	evergreen_set_tex_sampler(pScrn, &tex_samp);
 	break;
     }
 
diff --git a/src/r600_shader.c b/src/r600_shader.c
index ab2f485..4cb2fc8 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1090,7 +1090,7 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_ACTIVE),
-                            I_COUNT(2),
+                            I_COUNT(1),
                             CALL_COUNT(0),
                             END_OF_PROGRAM(0),
                             VALID_PIXEL_MODE(0),
@@ -1120,8 +1120,8 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     shader[i++] = TEX_DWORD1(DST_GPR(1),
                              DST_REL(ABSOLUTE),
                              DST_SEL_X(SQ_SEL_X),
-                             DST_SEL_Y(SQ_SEL_MASK),
-                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_Y),
+                             DST_SEL_Z(SQ_SEL_Z),
                              DST_SEL_W(SQ_SEL_1),
                              LOD_BIAS(0),
                              COORD_TYPE_X(TEX_NORMALIZED),
@@ -1137,34 +1137,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
                              SRC_SEL_Z(SQ_SEL_0),
                              SRC_SEL_W(SQ_SEL_1));
     shader[i++] = TEX_DWORD_PAD;
-    /* 28/29 */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-                             BC_FRAC_MODE(0),
-                             FETCH_WHOLE_QUAD(0),
-                             RESOURCE_ID(1),
-                             SRC_GPR(0),
-                             SRC_REL(ABSOLUTE),
-                             R7xx_ALT_CONST(0));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-                             DST_REL(ABSOLUTE),
-                             DST_SEL_X(SQ_SEL_MASK),
-                             DST_SEL_Y(SQ_SEL_X),
-                             DST_SEL_Z(SQ_SEL_Y),
-                             DST_SEL_W(SQ_SEL_MASK),
-                             LOD_BIAS(0),
-                             COORD_TYPE_X(TEX_NORMALIZED),
-                             COORD_TYPE_Y(TEX_NORMALIZED),
-                             COORD_TYPE_Z(TEX_NORMALIZED),
-                             COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-                             OFFSET_Y(0),
-                             OFFSET_Z(0),
-                             SAMPLER_ID(1),
-                             SRC_SEL_X(SQ_SEL_X),
-                             SRC_SEL_Y(SQ_SEL_Y),
-                             SRC_SEL_Z(SQ_SEL_0),
-                             SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
 
     return i;
 }
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 62da992..7610050 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -358,7 +358,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     default:
 	accel_state->src_size[0] = accel_state->src_obj[0].pitch * pPriv->h;
 
-	/* Y texture */
+	/* YUV texture */
 	tex_res.id                  = 0;
 	tex_res.w                   = accel_state->src_obj[0].width;
 	tex_res.h                   = accel_state->src_obj[0].height;
@@ -371,13 +371,13 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.bo                  = accel_state->src_obj[0].bo;
 	tex_res.mip_bo              = accel_state->src_obj[0].bo;
 
-	tex_res.format              = FMT_8_8;
 	if (pPriv->id == FOURCC_UYVY)
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
+	    tex_res.format              = FMT_GB_GR;
 	else
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
-	tex_res.dst_sel_y           = SQ_SEL_1;
-	tex_res.dst_sel_z           = SQ_SEL_1;
+	    tex_res.format              = FMT_BG_RG;
+	tex_res.dst_sel_x           = SQ_SEL_Y;
+	tex_res.dst_sel_y           = SQ_SEL_X;
+	tex_res.dst_sel_z           = SQ_SEL_Z;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 
 	tex_res.request_size        = 1;
@@ -389,7 +389,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	    tex_res.tile_mode           = 1;
 	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
 
-	/* Y sampler */
+	/* YUV sampler */
 	tex_samp.id                 = 0;
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
@@ -403,33 +403,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_samp.mip_filter         = 0;			/* no mipmap */
 	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 
-	/* UV texture */
-	tex_res.id                  = 1;
-	tex_res.format              = FMT_8_8_8_8;
-	tex_res.w                   = accel_state->src_obj[0].width >> 1;
-	tex_res.h                   = accel_state->src_obj[0].height;
-	tex_res.pitch               = accel_state->src_obj[0].pitch >> 2;
-	if (pPriv->id == FOURCC_UYVY) {
-	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
-	} else {
-	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
-	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
-	}
-	tex_res.dst_sel_z           = SQ_SEL_1;
-	tex_res.dst_sel_w           = SQ_SEL_1;
-	tex_res.interlaced          = 0;
-
-	tex_res.base                = accel_state->src_obj[0].offset;
-	tex_res.mip_base            = accel_state->src_obj[0].offset;
-	tex_res.size                = accel_state->src_size[0];
-	if (accel_state->src_obj[0].tiling_flags == 0)
-	    tex_res.tile_mode           = 1;
-	r600_set_tex_resource(pScrn, accel_state->ib, &tex_res, accel_state->src_obj[0].domain);
-
-	/* UV sampler */
-	tex_samp.id                 = 1;
-	r600_set_tex_sampler(pScrn, accel_state->ib, &tex_samp);
 	break;
     }
 
-- 
1.7.7



More information about the xorg-driver-ati mailing list