xf86-video-ati: Branch 'master' - 2 commits

Alex Deucher agd5f at kemper.freedesktop.org
Sun Mar 1 21:21:53 PST 2009


 src/Makefile.am                |    2 
 src/r600_exa.c                 | 2601 ++---------------------------------------
 src/r600_shader.c              | 2226 +++++++++++++++++++++++++++++++++++
 src/r600_shader.h              |   21 
 src/r600_textured_videofuncs.c |   68 -
 5 files changed, 2452 insertions(+), 2466 deletions(-)

New commits:
commit ccde35c3eda3fff0de29eb8c6fdc392629724a34
Author: Christian Koenig <deathsimple at vodafone.de>
Date:   Sun Mar 1 23:38:37 2009 -0500

    R6xx/R7xx: move shaders to r600_shader.c and fixup Xv PS
    
    patches from Christian Koenig with some adjustments from me

diff --git a/src/Makefile.am b/src/Makefile.am
index 7ff7d31..7cc2a6f 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -66,7 +66,7 @@ XMODE_SRCS=\
         modes/xf86DiDGA.c
 
 if USE_EXA
-RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c
+RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c
 endif
 
 AM_CFLAGS = @XORG_CFLAGS@ @DRI_CFLAGS@ @XMODES_CFLAGS@ -DDISABLE_EASF -DENABLE_ALL_SERVICE_FUNCTIONS -DATOM_BIOS -DATOM_BIOS_PARSER -DDRIVER_PARSER
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 633663c..a44b611 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1316,7 +1316,6 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     uint32_t blendcntl, dst_format;
     cb_config_t cb_conf;
     shader_config_t vs_conf, ps_conf;
-    int i = 0;
     uint32_t ps[24];
 
     /* return FALSE; */
@@ -1441,221 +1440,10 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	    }
 	}
 
-	/* 0 */
-	ps[i++] = CF_DWORD0(ADDR(8));
-	ps[i++] = CF_DWORD1(POP_COUNT(0),
-			    CF_CONST(0),
-			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(2),
-			    CALL_COUNT(0),
-			    END_OF_PROGRAM(0),
-			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_TEX),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-
-	/* 1 */
-	ps[i++] = CF_ALU_DWORD0(ADDR(3),
-				KCACHE_BANK0(0),
-				KCACHE_BANK1(0),
-				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
-	ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
-				KCACHE_ADDR0(0),
-				KCACHE_ADDR1(0),
-				I_COUNT(4),
-				USES_WATERFALL(0),
-				CF_INST(SQ_CF_INST_ALU),
-				WHOLE_QUAD_MODE(0),
-				BARRIER(1));
-
-	/* 2 */
-	ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(2),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(1));
-
-	ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_Z),
-					       SRC_SEL_W(SQ_SEL_W),
-					       R6xx_ELEM_LOOP(0),
-					       BURST_COUNT(1),
-					       END_OF_PROGRAM(1),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       WHOLE_QUAD_MODE(0),
-					       BARRIER(1));
-
-	/* 3 - alu 0 */
-	/* MUL gpr[2].x gpr[1].x gpr[0].x */
-	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_X),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_X),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(0));
-	ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_X),
-				 CLAMP(1));
-	/* 4 - alu 1 */
-	/* MUL gpr[2].y gpr[1].y gpr[0].y */
-	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_Y),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_Y),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(0));
-	ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_Y),
-				 CLAMP(1));
-	/* 5 - alu 2 */
-	/* MUL gpr[2].z gpr[1].z gpr[0].z */
-	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_Z),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_Z),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(0));
-	ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_Z),
-				 CLAMP(1));
-	/* 6 - alu 3 */
-	/* MUL gpr[2].w gpr[1].w gpr[0].w */
-	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_W),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_W),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(1));
-	ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_W),
-				 CLAMP(1));
-	/* 7 */
-	ps[i++] = 0x00000000;
-	ps[i++] = 0x00000000;
-
-	/* 8/9 - src */
-	ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			     BC_FRAC_MODE(0),
-			     FETCH_WHOLE_QUAD(0),
-			     RESOURCE_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     R7xx_ALT_CONST(0));
-	ps[i++] = TEX_DWORD1(DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(src_r),
-			     DST_SEL_Y(src_g),
-			     DST_SEL_Z(src_b),
-			     DST_SEL_W(src_a),
-			     LOD_BIAS(0),
-			     COORD_TYPE_X(TEX_NORMALIZED),
-			     COORD_TYPE_Y(TEX_NORMALIZED),
-			     COORD_TYPE_Z(TEX_NORMALIZED),
-			     COORD_TYPE_W(TEX_NORMALIZED));
-	ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			     OFFSET_Y(0),
-			     OFFSET_Z(0),
-			     SAMPLER_ID(0),
-			     SRC_SEL_X(SQ_SEL_X),
-			     SRC_SEL_Y(SQ_SEL_Y),
-			     SRC_SEL_Z(SQ_SEL_0),
-			     SRC_SEL_W(SQ_SEL_1));
-	ps[i++] = TEX_DWORD_PAD;
-	/* 10/11 - mask */
-	ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			     BC_FRAC_MODE(0),
-			     FETCH_WHOLE_QUAD(0),
-			     RESOURCE_ID(1),
-			     SRC_GPR(1),
-			     SRC_REL(ABSOLUTE),
-			     R7xx_ALT_CONST(0));
-	ps[i++] = TEX_DWORD1(DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(mask_r),
-			     DST_SEL_Y(mask_g),
-			     DST_SEL_Z(mask_b),
-			     DST_SEL_W(mask_a),
-			     LOD_BIAS(0),
-			     COORD_TYPE_X(TEX_NORMALIZED),
-			     COORD_TYPE_Y(TEX_NORMALIZED),
-			     COORD_TYPE_Z(TEX_NORMALIZED),
-			     COORD_TYPE_W(TEX_NORMALIZED));
-	ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			     OFFSET_Y(0),
-			     OFFSET_Z(0),
-			     SAMPLER_ID(1),
-			     SRC_SEL_X(SQ_SEL_X),
-			     SRC_SEL_Y(SQ_SEL_Y),
-			     SRC_SEL_Z(SQ_SEL_0),
-			     SRC_SEL_W(SQ_SEL_1));
-	ps[i++] = TEX_DWORD_PAD;
+	R600_comp_mask_ps(info->ChipFamily, ps,
+			  src_a, src_r, src_g, src_b,
+			  mask_a, mask_r, mask_g, mask_b);
+
     } else {
 	int src_a, src_r, src_g, src_b;
 	/* setup pixel shader */
@@ -1675,67 +1463,9 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	    src_a = SQ_SEL_W;
 	}
 
-	/* 0 */
-	ps[i++] = CF_DWORD0(ADDR(2));
-	ps[i++] = CF_DWORD1(POP_COUNT(0),
-			    CF_CONST(0),
-			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(1),
-			    CALL_COUNT(0),
-			    END_OF_PROGRAM(0),
-			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_TEX),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-	/* 1 */
-	ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(0),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(1));
-
-	ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_Z),
-					       SRC_SEL_W(SQ_SEL_W),
-					       R6xx_ELEM_LOOP(0),
-					       BURST_COUNT(1),
-					       END_OF_PROGRAM(1),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       WHOLE_QUAD_MODE(0),
-					       BARRIER(1));
-
-
-	/* 2/3 - src */
-	ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			     BC_FRAC_MODE(0),
-			     FETCH_WHOLE_QUAD(0),
-			     RESOURCE_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     R7xx_ALT_CONST(0));
-	ps[i++] = TEX_DWORD1(DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(src_r),
-			     DST_SEL_Y(src_g),
-			     DST_SEL_Z(src_b),
-			     DST_SEL_W(src_a),
-			     LOD_BIAS(0),
-			     COORD_TYPE_X(TEX_NORMALIZED),
-			     COORD_TYPE_Y(TEX_NORMALIZED),
-			     COORD_TYPE_Z(TEX_NORMALIZED),
-			     COORD_TYPE_W(TEX_NORMALIZED));
-	ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			     OFFSET_Y(0),
-			     OFFSET_Z(0),
-			     SAMPLER_ID(0),
-			     SRC_SEL_X(SQ_SEL_X),
-			     SRC_SEL_Y(SQ_SEL_Y),
-			     SRC_SEL_Z(SQ_SEL_0),
-			     SRC_SEL_W(SQ_SEL_1));
-	ps[i++] = TEX_DWORD_PAD;
+	R600_comp_ps(info->ChipFamily, ps,
+		     src_a, src_r, src_g, src_b);
+
     }
 
     CLEAR (cb_conf);
@@ -2246,11 +1976,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
-    uint32_t *vs;
-    uint32_t *ps;
+    RADEONChipFamily ChipSet = info->ChipFamily;
+    uint32_t *shader;
     /* 512 bytes per shader for now */
     int size = 512 * 11;
-    int i;
 
     accel_state->shaders = NULL;
 
@@ -2260,1991 +1989,51 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     if (accel_state->shaders == NULL)
 	return FALSE;
 
-    vs = (pointer)((char *)info->FB + accel_state->shaders->offset);
-    ps = (pointer)((char *)info->FB + accel_state->shaders->offset);
+    shader = (pointer)((char *)info->FB + accel_state->shaders->offset);
+
+    /*  solid vs --------------------------------------- */
     accel_state->solid_vs_offset = 0;
+    R600_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
+
+    /*  solid ps --------------------------------------- */
     accel_state->solid_ps_offset = 512;
+    R600_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
+
+    /*  copy vs --------------------------------------- */
     accel_state->copy_vs_offset = 1024;
+    R600_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
+
+    /*  copy ps --------------------------------------- */
     accel_state->copy_ps_offset = 1536;
+    R600_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
+
+    /*  comp vs --------------------------------------- */
     accel_state->comp_vs_offset = 2048;
+    R600_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
+
+    /*  comp ps --------------------------------------- */
     accel_state->comp_ps_offset = 2560;
+    /*  not yet */
+
+    /*  comp mask vs --------------------------------------- */
     accel_state->comp_mask_vs_offset = 3072;
+    R600_comp_mask_vs(ChipSet, shader + accel_state->comp_mask_vs_offset / 4);
+
+    /*  comp mask ps --------------------------------------- */
     accel_state->comp_mask_ps_offset = 3584;
+    /*  not yet */
+
+    /*  xv vs --------------------------------------- */
     accel_state->xv_vs_offset = 4096;
-    accel_state->xv_ps_offset_packed = 4608;
-    accel_state->xv_ps_offset_planar = 5120;
+    R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
 
-    /* solid vs --------------------------------------- */
-    i = accel_state->solid_vs_offset / 4;
-    /* 0 */
-    vs[i++] = CF_DWORD0(ADDR(4));
-    vs[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(1),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_VTX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* 1 */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-				      TYPE(SQ_EXPORT_POS),
-				      RW_GPR(1),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /*2 - always export a param whether it's used or not */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-		 			   BARRIER(0));
-    /* 3 - padding */
-    vs[i++] = 0x00000000;
-    vs[i++] = 0x00000000;
-    /* 4/5 */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(0),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(1));
-    vs[i++] = VTX_DWORD_PAD;
-
-    /* solid ps --------------------------------------- */
-    i = accel_state->solid_ps_offset / 4;
-    /* 0 */
-    ps[i++] = CF_ALU_DWORD0(ADDR(2),
-			    KCACHE_BANK0(0),
-			    KCACHE_BANK1(0),
-			    KCACHE_MODE0(SQ_CF_KCACHE_NOP));
-    ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
-			    KCACHE_ADDR0(0),
-			    KCACHE_ADDR1(0),
-			    I_COUNT(4),
-			    USES_WATERFALL(0),
-			    CF_INST(SQ_CF_INST_ALU),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-    /* 1 */
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-				      TYPE(SQ_EXPORT_PIXEL),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(1));
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-
-    /* 2 */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(256),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(0),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_AR_X),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 3 */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(256),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(0),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_AR_X),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 4 */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(256),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(0),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_AR_X),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 5 */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(256),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(0),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_AR_X),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_MOV),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-
-    /* copy vs --------------------------------------- */
-    i = accel_state->copy_vs_offset / 4;
-    /* 0 */
-    vs[i++] = CF_DWORD0(ADDR(4));
-    vs[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(2),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_VTX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* 1 */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-				      TYPE(SQ_EXPORT_POS),
-				      RW_GPR(1),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* 2 */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 3 */
-    vs[i++] = 0x00000000;
-    vs[i++] = 0x00000000;
-    /* 4/5 */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(16));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(0),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(1));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 6/7 */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(8),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-
-    /* copy ps --------------------------------------- */
-    i = accel_state->copy_ps_offset / 4;
-    /* CF INST 0 */
-    ps[i++] = CF_DWORD0(ADDR(2));
-    ps[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(1),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_TEX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* CF INST 1 */
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-				      TYPE(SQ_EXPORT_PIXEL),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(1));
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* TEX INST 0 */
-    ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			 BC_FRAC_MODE(0),
-			 FETCH_WHOLE_QUAD(0),
-			 RESOURCE_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 R7xx_ALT_CONST(0));
-    ps[i++] = TEX_DWORD1(DST_GPR(0),
-			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_X), /* R */
-			 DST_SEL_Y(SQ_SEL_Y), /* G */
-			 DST_SEL_Z(SQ_SEL_Z), /* B */
-			 DST_SEL_W(SQ_SEL_W), /* A */
-			 LOD_BIAS(0),
-			 COORD_TYPE_X(TEX_UNNORMALIZED),
-			 COORD_TYPE_Y(TEX_UNNORMALIZED),
-			 COORD_TYPE_Z(TEX_UNNORMALIZED),
-			 COORD_TYPE_W(TEX_UNNORMALIZED));
-    ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			 OFFSET_Y(0),
-			 OFFSET_Z(0),
-			 SAMPLER_ID(0),
-			 SRC_SEL_X(SQ_SEL_X),
-			 SRC_SEL_Y(SQ_SEL_Y),
-			 SRC_SEL_Z(SQ_SEL_0),
-			 SRC_SEL_W(SQ_SEL_1));
-    ps[i++] = TEX_DWORD_PAD;
-
-    /* xv vs --------------------------------------- */
-    i = accel_state->xv_vs_offset / 4;
-    /* 0 */
-    vs[i++] = CF_DWORD0(ADDR(4));
-    vs[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(2),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_VTX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* 1 */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-				      TYPE(SQ_EXPORT_POS),
-				      RW_GPR(1),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* 2 */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 3 */
-    vs[i++] = 0x00000000;
-    vs[i++] = 0x00000000;
-    /* 4/5 */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(16));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(0),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(1));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 6/7 */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(8),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-
-    /* xv ps packed ---------------------------------- */
-    i = accel_state->xv_ps_offset_packed / 4;
-    /* 0 */
-    ps[i++] = CF_DWORD0(ADDR(20));
-    ps[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(2),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_TEX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(0));
-    /* 1 */
-    ps[i++] = CF_ALU_DWORD0(ADDR(3),
-			    KCACHE_BANK0(0),
-			    KCACHE_BANK1(0),
-			    KCACHE_MODE0(SQ_CF_KCACHE_NOP));
-    ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
-			    KCACHE_ADDR0(0),
-			    KCACHE_ADDR1(0),
-			    I_COUNT(16),
-			    USES_WATERFALL(0),
-			    CF_INST(SQ_CF_INST_ALU),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-    /* 2 */
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-				      TYPE(SQ_EXPORT_PIXEL),
-				      RW_GPR(2),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(3));
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* Undo scaling of Y'CbCr values
-     *  Y' is scaled from 16:235
-     *  Cb/Cr are scaled from 16:240
-     */
-    /* 3 - alu 0 */
-    /* MULADD gpr[1].x gpr[1].x c[3].x c[3].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(259),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
-			     SRC2_REL(ABSOLUTE),
-			     SRC2_ELEM(ELEM_Y),
-			     SRC2_NEG(0),
-			     ALU_INST(SQ_OP3_INST_MULADD),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 4 - alu 1 */
-    /* MULADD gpr[1].y gpr[1].y c[3].z c[3].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(259),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
-			     SRC2_REL(ABSOLUTE),
-			     SRC2_ELEM(ELEM_W),
-			     SRC2_NEG(0),
-			     ALU_INST(SQ_OP3_INST_MULADD),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(0));
-    /* 5 - alu 2 */
-    /* MULADD gpr[1].z gpr[1].z c[3].z c[3].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(259),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
-			     SRC2_REL(ABSOLUTE),
-			     SRC2_ELEM(ELEM_W),
-			     SRC2_NEG(0),
-			     ALU_INST(SQ_OP3_INST_MULADD),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(0));
-    /* 6 - alu 3 */
-    /* MOV gpr[1].w 0.0 */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(SQ_ALU_SRC_0),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(0));
-    /* 7 - alu 4 */
-    /* DP4 gpr[2].x gpr[1].x c[0].x */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 8 - alu 5 */
-    /* DP4 gpr[2].y gpr[1].y c[0].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 9 - alu 6 */
-    /* DP4 gpr[2].z gpr[1].z c[0].z */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 10 - alu 7 */
-    /* DP4 gpr[2].w gpr[1].w c[0].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-    /* 11 - alu 8 */
-    /* DP4 gpr[2].x gpr[1].x c[1].x */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 12 - alu 9 */
-    /* DP4 gpr[2].y gpr[1].y c[1].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 13 - alu 10 */
-    /* DP4 gpr[2].z gpr[1].z c[1].z */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 14 - alu 11 */
-    /* DP4 gpr[2].w gpr[1].w c[1].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-    /* 15 - alu 12 */
-    /* DP4 gpr[2].x gpr[1].x c[2].x */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 16 - alu 13 */
-    /* DP4 gpr[2].y gpr[1].y c[2].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 17 - alu 14 */
-    /* DP4 gpr[2].z gpr[1].z c[2].z */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 18 - alu 15 */
-    /* DP4 gpr[2].w gpr[1].w c[2].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-    /* 19 - alignment */
-    ps[i++] = 0x00000000;
-    ps[i++] = 0x00000000;
-    /* 20/21 - tex 0 */
-    ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			 BC_FRAC_MODE(0),
-			 FETCH_WHOLE_QUAD(0),
-			 RESOURCE_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 R7xx_ALT_CONST(0));
-    ps[i++] = TEX_DWORD1(DST_GPR(1),
-			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_X),    /* R */
-			 DST_SEL_Y(SQ_SEL_MASK), /* G */
-			 DST_SEL_Z(SQ_SEL_MASK), /* B */
-			 DST_SEL_W(SQ_SEL_1),    /* A */
-			 LOD_BIAS(0),
-			 COORD_TYPE_X(TEX_NORMALIZED),
-			 COORD_TYPE_Y(TEX_NORMALIZED),
-			 COORD_TYPE_Z(TEX_NORMALIZED),
-			 COORD_TYPE_W(TEX_NORMALIZED));
-    ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			 OFFSET_Y(0),
-			 OFFSET_Z(0),
-			 SAMPLER_ID(0),
-			 SRC_SEL_X(SQ_SEL_X),
-			 SRC_SEL_Y(SQ_SEL_Y),
-			 SRC_SEL_Z(SQ_SEL_0),
-			 SRC_SEL_W(SQ_SEL_1));
-    ps[i++] = TEX_DWORD_PAD;
-    /* 22/23 - tex 1 */
-    ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			 BC_FRAC_MODE(0),
-			 FETCH_WHOLE_QUAD(0),
-			 RESOURCE_ID(1),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 R7xx_ALT_CONST(0));
-    ps[i++] = TEX_DWORD1(DST_GPR(1),
-			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_MASK), /* R */
-			 DST_SEL_Y(SQ_SEL_X),    /* G */
-			 DST_SEL_Z(SQ_SEL_Y),    /* B */
-			 DST_SEL_W(SQ_SEL_MASK), /* A */
-			 LOD_BIAS(0),
-			 COORD_TYPE_X(TEX_NORMALIZED),
-			 COORD_TYPE_Y(TEX_NORMALIZED),
-			 COORD_TYPE_Z(TEX_NORMALIZED),
-			 COORD_TYPE_W(TEX_NORMALIZED));
-    ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			 OFFSET_Y(0),
-			 OFFSET_Z(0),
-			 SAMPLER_ID(1),
-			 SRC_SEL_X(SQ_SEL_X),
-			 SRC_SEL_Y(SQ_SEL_Y),
-			 SRC_SEL_Z(SQ_SEL_0),
-			 SRC_SEL_W(SQ_SEL_1));
-    ps[i++] = TEX_DWORD_PAD;
-
-    /* xv ps planar ---------------------------------- */
-    i = accel_state->xv_ps_offset_planar / 4;
-    /* 0 */
-    ps[i++] = CF_DWORD0(ADDR(20));
-    ps[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(3),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_TEX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(0));
-    /* 1 */
-    ps[i++] = CF_ALU_DWORD0(ADDR(3),
-			    KCACHE_BANK0(0),
-			    KCACHE_BANK1(0),
-			    KCACHE_MODE0(SQ_CF_KCACHE_NOP));
-    ps[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
-			    KCACHE_ADDR0(0),
-			    KCACHE_ADDR1(0),
-			    I_COUNT(16),
-			    USES_WATERFALL(0),
-			    CF_INST(SQ_CF_INST_ALU),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-    /* 2 */
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-				      TYPE(SQ_EXPORT_PIXEL),
-				      RW_GPR(2),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(3));
-    ps[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* Undo scaling of Y'CbCr values
-     *  Y' is scaled from 16:235
-     *  Cb/Cr are scaled from 16:240
-     */
-    /* 3 - alu 0 */
-    /* MULADD gpr[1].x gpr[1].x c[3].x c[3].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(259),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
-			     SRC2_REL(ABSOLUTE),
-			     SRC2_ELEM(ELEM_Y),
-			     SRC2_NEG(0),
-			     ALU_INST(SQ_OP3_INST_MULADD),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 4 - alu 1 */
-    /* MULADD gpr[1].y gpr[1].y c[3].z c[3].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(259),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
-			     SRC2_REL(ABSOLUTE),
-			     SRC2_ELEM(ELEM_W),
-			     SRC2_NEG(0),
-			     ALU_INST(SQ_OP3_INST_MULADD),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(0));
-    /* 5 - alu 2 */
-    /* MULADD gpr[1].z gpr[1].z c[3].z c[3].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(259),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
-			     SRC2_REL(ABSOLUTE),
-			     SRC2_ELEM(ELEM_W),
-			     SRC2_NEG(0),
-			     ALU_INST(SQ_OP3_INST_MULADD),
-			     BANK_SWIZZLE(SQ_ALU_VEC_012),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(0));
-    /* 6 - alu 3 */
-    /* MOV gpr[1].w 0.0 */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(SQ_ALU_SRC_0),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(0));
-    /* 7 - alu 4 */
-    /* DP4 gpr[2].x gpr[1].x c[0].x */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 8 - alu 5 */
-    /* DP4 gpr[2].y gpr[1].y c[0].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 9 - alu 6 */
-    /* DP4 gpr[2].z gpr[1].z c[0].z */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 10 - alu 7 */
-    /* DP4 gpr[2].w gpr[1].w c[0].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(256),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-    /* 11 - alu 8 */
-    /* DP4 gpr[2].x gpr[1].x c[1].x */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 12 - alu 9 */
-    /* DP4 gpr[2].y gpr[1].y c[1].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 13 - alu 10 */
-    /* DP4 gpr[2].z gpr[1].z c[1].z */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 14 - alu 11 */
-    /* DP4 gpr[2].w gpr[1].w c[1].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(257),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-    /* 15 - alu 12 */
-    /* DP4 gpr[2].x gpr[1].x c[2].x */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_X),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_X),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_X),
-			     CLAMP(1));
-    /* 16 - alu 13 */
-    /* DP4 gpr[2].y gpr[1].y c[2].y */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Y),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Y),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Y),
-			     CLAMP(1));
-    /* 17 - alu 14 */
-    /* DP4 gpr[2].z gpr[1].z c[2].z */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_Z),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_Z),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(0));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(1),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_102),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_Z),
-			     CLAMP(1));
-    /* 18 - alu 15 */
-    /* DP4 gpr[2].w gpr[1].w c[2].w */
-    ps[i++] = ALU_DWORD0(SRC0_SEL(1),
-			 SRC0_REL(ABSOLUTE),
-			 SRC0_ELEM(ELEM_W),
-			 SRC0_NEG(0),
-			 SRC1_SEL(258),
-			 SRC1_REL(ABSOLUTE),
-			 SRC1_ELEM(ELEM_W),
-			 SRC1_NEG(0),
-			 INDEX_MODE(SQ_INDEX_LOOP),
-			 PRED_SEL(SQ_PRED_SEL_OFF),
-			 LAST(1));
-    ps[i++] = ALU_DWORD1_OP2(info->ChipFamily,
-			     SRC0_ABS(0),
-			     SRC1_ABS(0),
-			     UPDATE_EXECUTE_MASK(0),
-			     UPDATE_PRED(0),
-			     WRITE_MASK(0),
-			     FOG_MERGE(0),
-			     OMOD(SQ_ALU_OMOD_OFF),
-			     ALU_INST(SQ_OP2_INST_DOT4),
-			     BANK_SWIZZLE(SQ_ALU_VEC_021),
-			     DST_GPR(2),
-			     DST_REL(ABSOLUTE),
-			     DST_ELEM(ELEM_W),
-			     CLAMP(1));
-    /* 19 - alignment */
-    ps[i++] = 0x00000000;
-    ps[i++] = 0x00000000;
-    /* 20/21 - tex 0 */
-    ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			 BC_FRAC_MODE(0),
-			 FETCH_WHOLE_QUAD(0),
-			 RESOURCE_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 R7xx_ALT_CONST(0));
-    ps[i++] = TEX_DWORD1(DST_GPR(1),
-			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_X),    /* R */
-			 DST_SEL_Y(SQ_SEL_MASK), /* G */
-			 DST_SEL_Z(SQ_SEL_MASK), /* B */
-			 DST_SEL_W(SQ_SEL_1),    /* A */
-			 LOD_BIAS(0),
-			 COORD_TYPE_X(TEX_NORMALIZED),
-			 COORD_TYPE_Y(TEX_NORMALIZED),
-			 COORD_TYPE_Z(TEX_NORMALIZED),
-			 COORD_TYPE_W(TEX_NORMALIZED));
-    ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			 OFFSET_Y(0),
-			 OFFSET_Z(0),
-			 SAMPLER_ID(0),
-			 SRC_SEL_X(SQ_SEL_X),
-			 SRC_SEL_Y(SQ_SEL_Y),
-			 SRC_SEL_Z(SQ_SEL_0),
-			 SRC_SEL_W(SQ_SEL_1));
-    ps[i++] = TEX_DWORD_PAD;
-    /* 22/23 - tex 1 */
-    ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			 BC_FRAC_MODE(0),
-			 FETCH_WHOLE_QUAD(0),
-			 RESOURCE_ID(1),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 R7xx_ALT_CONST(0));
-    ps[i++] = TEX_DWORD1(DST_GPR(1),
-			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_MASK), /* R */
-			 DST_SEL_Y(SQ_SEL_MASK), /* G */
-			 DST_SEL_Z(SQ_SEL_X),    /* B */
-			 DST_SEL_W(SQ_SEL_MASK), /* A */
-			 LOD_BIAS(0),
-			 COORD_TYPE_X(TEX_NORMALIZED),
-			 COORD_TYPE_Y(TEX_NORMALIZED),
-			 COORD_TYPE_Z(TEX_NORMALIZED),
-			 COORD_TYPE_W(TEX_NORMALIZED));
-    ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			 OFFSET_Y(0),
-			 OFFSET_Z(0),
-			 SAMPLER_ID(1),
-			 SRC_SEL_X(SQ_SEL_X),
-			 SRC_SEL_Y(SQ_SEL_Y),
-			 SRC_SEL_Z(SQ_SEL_0),
-			 SRC_SEL_W(SQ_SEL_1));
-    ps[i++] = TEX_DWORD_PAD;
-    /* 24/25 - tex 2 */
-    ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			 BC_FRAC_MODE(0),
-			 FETCH_WHOLE_QUAD(0),
-			 RESOURCE_ID(2),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 R7xx_ALT_CONST(0));
-    ps[i++] = TEX_DWORD1(DST_GPR(1),
-			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_MASK), /* R */
-			 DST_SEL_Y(SQ_SEL_X),    /* G */
-			 DST_SEL_Z(SQ_SEL_MASK), /* B */
-			 DST_SEL_W(SQ_SEL_MASK), /* A */
-			 LOD_BIAS(0),
-			 COORD_TYPE_X(TEX_NORMALIZED),
-			 COORD_TYPE_Y(TEX_NORMALIZED),
-			 COORD_TYPE_Z(TEX_NORMALIZED),
-			 COORD_TYPE_W(TEX_NORMALIZED));
-    ps[i++] = TEX_DWORD2(OFFSET_X(0),
-			 OFFSET_Y(0),
-			 OFFSET_Z(0),
-			 SAMPLER_ID(2),
-			 SRC_SEL_X(SQ_SEL_X),
-			 SRC_SEL_Y(SQ_SEL_Y),
-			 SRC_SEL_Z(SQ_SEL_0),
-			 SRC_SEL_W(SQ_SEL_1));
-    ps[i++] = TEX_DWORD_PAD;
-
-    /* comp mask vs --------------------------------------- */
-    i = accel_state->comp_mask_vs_offset / 4;
-    /* 0 */
-    vs[i++] = CF_DWORD0(ADDR(4));
-    vs[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(3),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_VTX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* 1 - dst */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-				      TYPE(SQ_EXPORT_POS),
-				      RW_GPR(2),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* 2 - src */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(1),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 3 - mask */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 4/5 - dst */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(24));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(2),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(0),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(1));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 6/7 - src */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(8),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 8/9 - mask */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(16),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-
-    /* comp mask vs --------------------------------------- */
-    i = accel_state->comp_mask_vs_offset / 4;
-    /* 0 */
-    vs[i++] = CF_DWORD0(ADDR(4));
-    vs[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(3),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_VTX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* 1 - dst */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-				      TYPE(SQ_EXPORT_POS),
-				      RW_GPR(2),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* 2 - src */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(1),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 3 - mask */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(1),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 4/5 - dst */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(24));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(2),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(0),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(1));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 6/7 - src */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(8),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 8/9 - mask */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(16),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-
-    // comp mask ps --------------------------------------- */
-    /* not yet */
-
-    /* comp vs --------------------------------------- */
-    i = accel_state->comp_vs_offset / 4;
-    /* 0 */
-    vs[i++] = CF_DWORD0(ADDR(4));
-    vs[i++] = CF_DWORD1(POP_COUNT(0),
-			CF_CONST(0),
-			COND(SQ_CF_COND_ACTIVE),
-			I_COUNT(2),
-			CALL_COUNT(0),
-			END_OF_PROGRAM(0),
-			VALID_PIXEL_MODE(0),
-			CF_INST(SQ_CF_INST_VTX),
-			WHOLE_QUAD_MODE(0),
-			BARRIER(1));
-    /* 1 - dst */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-				      TYPE(SQ_EXPORT_POS),
-				      RW_GPR(1),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(0),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(1));
-    /* 2 - src */
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-				      TYPE(SQ_EXPORT_PARAM),
-				      RW_GPR(0),
-				      RW_REL(ABSOLUTE),
-				      INDEX_GPR(0),
-				      ELEM_SIZE(0));
-    vs[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					   SRC_SEL_Y(SQ_SEL_Y),
-					   SRC_SEL_Z(SQ_SEL_Z),
-					   SRC_SEL_W(SQ_SEL_W),
-					   R6xx_ELEM_LOOP(0),
-					   BURST_COUNT(0),
-					   END_OF_PROGRAM(1),
-					   VALID_PIXEL_MODE(0),
-					   CF_INST(SQ_CF_INST_EXPORT_DONE),
-					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    /* 3 */
-    vs[i++] = 0x00000000;
-    vs[i++] = 0x00000000;
-    /* 4/5 - dst */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(16));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(0),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(1));
-    vs[i++] = VTX_DWORD_PAD;
-    /* 6/7 - src */
-    vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			 FETCH_WHOLE_QUAD(0),
-			 BUFFER_ID(0),
-			 SRC_GPR(0),
-			 SRC_REL(ABSOLUTE),
-			 SRC_SEL_X(SQ_SEL_X),
-			 MEGA_FETCH_COUNT(8));
-    vs[i++] = VTX_DWORD1_GPR(DST_GPR(0),
-			     DST_REL(0),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_0),
-			     DST_SEL_W(SQ_SEL_1),
-			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT),
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    vs[i++] = VTX_DWORD2(OFFSET(8),
-			 ENDIAN_SWAP(ENDIAN_NONE),
-			 CONST_BUF_NO_STRIDE(0),
-			 MEGA_FETCH(0));
-    vs[i++] = VTX_DWORD_PAD;
-
-    /* comp ps --------------------------------------- */
-    /* not yet */
+    /*  xv ps packed --------------------------------------- */
+    accel_state->xv_ps_offset_packed = 4608;
+    R600_xv_ps_packet(ChipSet, shader + accel_state->xv_ps_offset_packed / 4);
 
+    /*  xv ps planar ---------------------------------- */
+    accel_state->xv_ps_offset_planar = 5120;
+    R600_xv_ps_planar(ChipSet, shader + accel_state->xv_ps_offset_planar / 4);
 
     return TRUE;
 }
diff --git a/src/r600_shader.c b/src/r600_shader.c
new file mode 100644
index 0000000..ba716da
--- /dev/null
+++ b/src/r600_shader.c
@@ -0,0 +1,2226 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "xf86.h"
+
+#include "radeon.h"
+#include "r600_shader.h"
+#include "r600_reg.h"
+
+/* solid vs --------------------------------------- */
+int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_VTX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* 2 - always export a param whether it's used or not */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 3 - padding */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* solid ps --------------------------------------- */
+int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(2),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				USES_WATERFALL(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+
+    /* 2 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 3 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 4 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(256),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    return i;
+}
+
+/* copy vs --------------------------------------- */
+int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_VTX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 3 */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(16));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 6/7 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* copy ps --------------------------------------- */
+int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i=0;
+
+    /* CF INST 0 */
+    shader[i++] = CF_DWORD0(ADDR(2));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TEX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* CF INST 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* TEX INST 0 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     BC_FRAC_MODE(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X), /* R */
+			     DST_SEL_Y(SQ_SEL_Y), /* G */
+			     DST_SEL_Z(SQ_SEL_Z), /* B */
+			     DST_SEL_W(SQ_SEL_W), /* A */
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_UNNORMALIZED),
+			     COORD_TYPE_Y(TEX_UNNORMALIZED),
+			     COORD_TYPE_Z(TEX_UNNORMALIZED),
+			     COORD_TYPE_W(TEX_UNNORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+/*
+ * ; xv vertex shader
+ * 00 VTX: ADDR(4) CNT(2) 
+ *       0  VFETCH R1.xy01, R0.x, fc0  MEGA(16) FORMAT(32_32_FLOAT) 
+ *          FORMAT_COMP(SIGNED) 
+ *       1  VFETCH R0.xy01, R0.x, fc0  MINI(8) OFFSET(8) FORMAT(32_32_FLOAT) 
+ *          FORMAT_COMP(SIGNED) 
+ * 01 EXP_DONE: POS0, R1
+ * 02 EXP_DONE: PARAM0, R0  NO_BARRIER 
+ * END_OF_PROGRAM
+ */
+int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(2),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_VTX),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+                                          TYPE(SQ_EXPORT_POS),
+                                          RW_GPR(1),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               R6xx_ELEM_LOOP(0),
+                                               BURST_COUNT(1),
+                                               END_OF_PROGRAM(0),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               WHOLE_QUAD_MODE(0),
+                                               BARRIER(1));
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+                                          TYPE(SQ_EXPORT_PARAM),
+                                          RW_GPR(0),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               R6xx_ELEM_LOOP(0),
+                                               BURST_COUNT(1),
+                                               END_OF_PROGRAM(1),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               WHOLE_QUAD_MODE(0),
+                                               BARRIER(0));
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+                             FETCH_WHOLE_QUAD(0),
+                             BUFFER_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             SRC_SEL_X(SQ_SEL_X),
+                             MEGA_FETCH_COUNT(16));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_SEL_X(SQ_SEL_X),
+                                 DST_SEL_Y(SQ_SEL_Y),
+                                 DST_SEL_Z(SQ_SEL_0),
+                                 DST_SEL_W(SQ_SEL_1),
+                                 USE_CONST_FIELDS(0),
+                                 DATA_FORMAT(FMT_32_32_FLOAT),
+                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+                             ENDIAN_SWAP(ENDIAN_NONE),
+                             CONST_BUF_NO_STRIDE(0),
+                             MEGA_FETCH(1));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 6/7 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+                             FETCH_WHOLE_QUAD(0),
+                             BUFFER_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             SRC_SEL_X(SQ_SEL_X),
+                             MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_SEL_X(SQ_SEL_X),
+                                 DST_SEL_Y(SQ_SEL_Y),
+                                 DST_SEL_Z(SQ_SEL_0),
+                                 DST_SEL_W(SQ_SEL_1),
+                                 USE_CONST_FIELDS(0),
+                                 DATA_FORMAT(FMT_32_32_FLOAT),
+                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+                             ENDIAN_SWAP(ENDIAN_NONE),
+                             CONST_BUF_NO_STRIDE(0),
+                             MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/*
+ * ; xv ps packed
+ * 00 TEX: ADDR(20) CNT(2) NO_BARRIER 
+ *       0  SAMPLE R1.x__1, R0.xy01, t0, s0
+ *       1  SAMPLE R1._xy_, R0.xy01, t1, s1
+ * 01 ALU: ADDR(3) CNT(16) 
+ *       2  x: MULADD      R1.x,  R1.x,  C3.x,  C3.y      CLAMP 
+ *          y: MULADD      R1.y,  R1.y,  C3.z,  C3.w      
+ *          z: MULADD      R1.z,  R1.z,  C3.z,  C3.w      
+ *          w: MOV         R1.w,  0.0f
+ *       3  x: DOT4        R2.x,  R1.x,  C0.x      CLAMP VEC_102 
+ *          y: DOT4        ____,  R1.y,  C0.y      CLAMP VEC_102 
+ *          z: DOT4        ____,  R1.z,  C0.z      CLAMP VEC_102 
+ *          w: DOT4        ____,  R1.w,  C0.w      CLAMP VEC_021 
+ *       4  x: DOT4        ____,  R1.x,  C1.x      CLAMP VEC_102 
+ *          y: DOT4        R2.y,  R1.y,  C1.y      CLAMP VEC_102 
+ *          z: DOT4        ____,  R1.z,  C1.z      CLAMP VEC_102 
+ *          w: DOT4        ____,  R1.w,  C1.w      CLAMP VEC_021 
+ *       5  x: DOT4        ____,  R1.x,  C2.x      CLAMP VEC_102 
+ *          y: DOT4        ____,  R1.y,  C2.y      CLAMP VEC_102 
+ *          z: DOT4        R2.z,  R1.z,  C2.z      CLAMP VEC_102 
+ *          w: DOT4        ____,  R1.w,  C2.w      CLAMP VEC_021 
+ * 02 EXP_DONE: PIX0, R2
+ * END_OF_PROGRAM
+ */
+int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(20));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(2),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TEX),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(3),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(16),
+                                USES_WATERFALL(0),
+                                CF_INST(SQ_CF_INST_ALU),
+                                WHOLE_QUAD_MODE(0),
+                                BARRIER(1));
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+                                          TYPE(SQ_EXPORT_PIXEL),
+                                          RW_GPR(2),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               R6xx_ELEM_LOOP(0),
+                                               BURST_COUNT(1),
+                                               END_OF_PROGRAM(1),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               WHOLE_QUAD_MODE(0),
+                                               BARRIER(1));
+    /* 3 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(259),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 4 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(259),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(259),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* 6 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MOV),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+    /* 7 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 8 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* 9 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* 10 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+    /* 11 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 12 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* 13 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* 14 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+    /* 15 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 16 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* 17 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* 18 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 20/21 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             BC_FRAC_MODE(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_X),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_1),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(0),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 22/23 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             BC_FRAC_MODE(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(1),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_X),
+                             DST_SEL_Z(SQ_SEL_Y),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(1),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+/*
+ * ; xv ps planar
+ * 00 TEX: ADDR(20) CNT(3) NO_BARRIER 
+ *       0  SAMPLE R1.x__1, R0.xy01, t0, s0
+ *       1  SAMPLE R1.__x_, R0.xy01, t1, s1
+ *       2  SAMPLE R1._x__, R0.xy01, t2, s2
+ * 01 ALU: ADDR(3) CNT(16) 
+ *       3  x: MULADD      R1.x,  R1.x,  C3.x,  C3.y      CLAMP 
+ *          y: MULADD      R1.y,  R1.y,  C3.z,  C3.w      
+ *          z: MULADD      R1.z,  R1.z,  C3.z,  C3.w      
+ *          w: MOV         R1.w,  0.0f 
+ *       4  x: DOT4        R2.x,  R1.x,  C0.x      CLAMP VEC_102 
+ *          y: DOT4        ____,  R1.y,  C0.y      CLAMP VEC_102 
+ *          z: DOT4        ____,  R1.z,  C0.z      CLAMP VEC_102 
+ *          w: DOT4        ____,  R1.w,  C0.w      CLAMP VEC_021 
+ *       5  x: DOT4        ____,  R1.x,  C1.x      CLAMP VEC_102 
+ *          y: DOT4        R2.y,  R1.y,  C1.y      CLAMP VEC_102 
+ *          z: DOT4        ____,  R1.z,  C1.z      CLAMP VEC_102 
+ *          w: DOT4        ____,  R1.w,  C1.w      CLAMP VEC_021 
+ *       6  x: DOT4        ____,  R1.x,  C2.x      CLAMP VEC_102 
+ *          y: DOT4        ____,  R1.y,  C2.y      CLAMP VEC_102 
+ *          z: DOT4        R2.z,  R1.z,  C2.z      CLAMP VEC_102 
+ *          w: DOT4        ____,  R1.w,  C2.w      CLAMP VEC_021 
+ * 02 EXP_DONE: PIX0, R2
+ * END_OF_PROGRAM
+ */
+int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i=0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(20));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(3),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TEX),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(3),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(16),
+                                USES_WATERFALL(0),
+                                CF_INST(SQ_CF_INST_ALU),
+                                WHOLE_QUAD_MODE(0),
+                                BARRIER(1));
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+                                          TYPE(SQ_EXPORT_PIXEL),
+                                          RW_GPR(2),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               R6xx_ELEM_LOOP(0),
+                                               BURST_COUNT(1),
+                                               END_OF_PROGRAM(1),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               WHOLE_QUAD_MODE(0),
+                                               BARRIER(1));
+    /* 3 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(259),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 4 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(259),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(259),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(259),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* 6 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MOV),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+    /* 7 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 8 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* 9 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* 10 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(256),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+    /* 11 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 12 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* 13 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* 14 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(257),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+    /* 15 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* 16 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* 17 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_102),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* 18 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(258),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+                                 SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 FOG_MERGE(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_021),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 20/21 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             BC_FRAC_MODE(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_X),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_1),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(0),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 22/23 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             BC_FRAC_MODE(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(1),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_X),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(1),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 24/25 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             BC_FRAC_MODE(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(2),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_X),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(2),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp mask vs --------------------------------------- */
+int R600_comp_mask_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(3),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_VTX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* 2 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 3 - mask */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 4/5 - dst */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(24));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 6/7 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 8/9 - mask */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(16),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp mask ps --------------------------------------- */
+int R600_comp_mask_ps(RADEONChipFamily ChipSet,
+		      uint32_t* shader,
+		      int src_a, int src_r, int src_g, int src_b,
+		      int mask_a, int mask_r, int mask_g, int mask_b)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(8));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TEX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 1 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(3),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				USES_WATERFALL(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+
+    /* 3 - alu 0 */
+    /* MUL gpr[2].x gpr[1].x gpr[0].x */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 4 - alu 1 */
+    /* MUL gpr[2].y gpr[1].y gpr[0].y */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 5 - alu 2 */
+    /* MUL gpr[2].z gpr[1].z gpr[0].z */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 6 - alu 3 */
+    /* MUL gpr[2].w gpr[1].w gpr[0].w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+    /* 7 */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+
+    /* 8/9 - src */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     BC_FRAC_MODE(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(src_r),
+			     DST_SEL_Y(src_g),
+			     DST_SEL_Z(src_b),
+			     DST_SEL_W(src_a),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 10/11 - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     BC_FRAC_MODE(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(1),
+			     SRC_GPR(1),
+			     SRC_REL(ABSOLUTE),
+			     R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(mask_r),
+			     DST_SEL_Y(mask_g),
+			     DST_SEL_Z(mask_b),
+			     DST_SEL_W(mask_a),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(1),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp vs --------------------------------------- */
+int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_VTX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* 2 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 3 */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 4/5 - dst */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(16));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 6/7 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT), /* xxx */
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), /* xxx */
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), /* xxx */
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp ps --------------------------------------- */
+int R600_comp_ps(RADEONChipFamily ChipSet,
+		 uint32_t* shader,
+		 int src_a, int src_r, int src_g, int src_b
+)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(2));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TEX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+
+
+    /* 2/3 - src */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     BC_FRAC_MODE(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(src_r),
+			     DST_SEL_Y(src_g),
+			     DST_SEL_Z(src_b),
+			     DST_SEL_W(src_a),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
diff --git a/src/r600_shader.h b/src/r600_shader.h
index 58f5a52..7333d0b 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -29,6 +29,7 @@
 #ifndef __SHADER_H__
 #define __SHADER_H__
 
+#include "radeon.h"
 
 /* Restrictions of ALU instructions
  * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1.
@@ -342,5 +343,25 @@
 	 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
 #define TEX_DWORD_PAD 0x00000000
 
+extern int R600_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int R600_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int R600_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int R600_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int R600_xv_ps_packet(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int R600_xv_ps_planar(RADEONChipFamily ChipSet, uint32_t* shader);
+
+extern int R600_comp_mask_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int R600_comp_mask_ps(RADEONChipFamily ChipSet,
+			     uint32_t* ps,
+			     int src_a, int src_r, int src_g, int src_b,
+			     int mask_a, int mask_r, int mask_g, int mask_b);
+
+extern int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int R600_comp_ps(RADEONChipFamily ChipSet,
+			uint32_t* ps,
+			int src_a, int src_r, int src_g, int src_b);
 
 #endif
commit a8e631c1b1c9b46602aeca66f8e7e68154d0bfc8
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Sun Mar 1 22:30:31 2009 -0500

    R6xx/R7xx: code cleanups

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 02152d0..633663c 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -42,7 +42,7 @@
 extern PixmapPtr
 RADEONGetDrawablePixmap(DrawablePtr pDrawable);
 
-//#define SHOW_VERTEXES
+/* #define SHOW_VERTEXES */
 
 #       define RADEON_ROP3_ZERO             0x00000000
 #       define RADEON_ROP3_DSa              0x00880000
@@ -103,11 +103,11 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height;
     accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
 
-    // bad pitch
+    /* bad pitch */
     if (accel_state->dst_pitch & 7)
 	return FALSE;
 
-    // bad offset
+    /* bad offset */
     if (accel_state->dst_mc_addr & 0xff)
 	return FALSE;
 
@@ -118,7 +118,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     CLEAR (vs_conf);
     CLEAR (ps_conf);
 
-    //return FALSE;
+    /* return FALSE; */
 
 #ifdef SHOW_VERTEXES
     ErrorF("%dx%d @ %dbpp, 0x%08x\n", pPix->drawable.width, pPix->drawable.height,
@@ -130,8 +130,6 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     /* Init */
     start_3d(pScrn, accel_state->ib);
 
-    //cp_set_surface_sync(pScrn, accel_state->ib);
-
     set_default_state(pScrn, accel_state->ib);
 
     /* Scissor / viewport */
@@ -170,13 +168,13 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 
     /* Render setup */
     if (pm & 0x000000ff)
-	pmask |= 4; //B
+	pmask |= 4; /* B */
     if (pm & 0x0000ff00)
-	pmask |= 2; //G
+	pmask |= 2; /* G */
     if (pm & 0x00ff0000)
-	pmask |= 1; //R
+	pmask |= 1; /* R */
     if (pm & 0xff000000)
-	pmask |= 8; //A
+	pmask |= 8; /* A */
     EREG(accel_state->ib, CB_SHADER_MASK,                      (pmask << OUTPUT0_ENABLE_shift));
     EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
     EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[alu]);
@@ -188,13 +186,13 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 
     if (pPix->drawable.bitsPerPixel == 8) {
 	cb_conf.format = COLOR_8;
-	cb_conf.comp_swap = 3; //A
+	cb_conf.comp_swap = 3; /* A */
     } else if (pPix->drawable.bitsPerPixel == 16) {
 	cb_conf.format = COLOR_5_6_5;
-	cb_conf.comp_swap = 2; //RGB
+	cb_conf.comp_swap = 2; /* RGB */
     } else {
 	cb_conf.format = COLOR_8_8_8_8;
-	cb_conf.comp_swap = 1; //ARGB
+	cb_conf.comp_swap = 1; /* ARGB */
     }
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
@@ -207,46 +205,46 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
 
     /* Interpolator setup */
-    // one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one)
+    /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */
     EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift));
     EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
 
     /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
      * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
-    // no VS exports as PS input (NUM_INTERP is not zero based, no minus one)
+    /* no VS exports as PS input (NUM_INTERP is not zero based, no minus one) */
     EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 (0 << NUM_INTERP_shift));
     EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
-    // color semantic id 0 -> GPR[0]
+    /* color semantic id 0 -> GPR[0] */
     EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
 								  (0x03 << DEFAULT_VAL_shift)	|
 								  FLAT_SHADE_bit		|
 								  SEL_CENTROID_bit));
     EREG(accel_state->ib, SPI_INTERP_CONTROL_0,                FLAT_SHADE_ENA_bit | 0);
 
-    // PS alu constants
+    /* PS alu constants */
     if (pPix->drawable.bitsPerPixel == 16) {
 	r = (fg >> 11) & 0x1f;
 	g = (fg >> 5) & 0x3f;
 	b = (fg >> 0) & 0x1f;
-	ps_alu_consts[0] = (float)r / 31; //R
-	ps_alu_consts[1] = (float)g / 63; //G
-	ps_alu_consts[2] = (float)b / 31; //B
-	ps_alu_consts[3] = 1.0; //A
+	ps_alu_consts[0] = (float)r / 31; /* R */
+	ps_alu_consts[1] = (float)g / 63; /* G */
+	ps_alu_consts[2] = (float)b / 31; /* B */
+	ps_alu_consts[3] = 1.0; /* A */
     } else if (pPix->drawable.bitsPerPixel == 8) {
 	a = (fg >> 0) & 0xff;
-	ps_alu_consts[0] = 0.0; //R
-	ps_alu_consts[1] = 0.0; //G
-	ps_alu_consts[2] = 0.0; //B
-	ps_alu_consts[3] = (float)a / 255; //A
+	ps_alu_consts[0] = 0.0; /* R */
+	ps_alu_consts[1] = 0.0; /* G */
+	ps_alu_consts[2] = 0.0; /* B */
+	ps_alu_consts[3] = (float)a / 255; /* A */
     } else {
 	a = (fg >> 24) & 0xff;
 	r = (fg >> 16) & 0xff;
 	g = (fg >> 8) & 0xff;
 	b = (fg >> 0) & 0xff;
-	ps_alu_consts[0] = (float)r / 255; //R
-	ps_alu_consts[1] = (float)g / 255; //G
-	ps_alu_consts[2] = (float)b / 255; //B
-	ps_alu_consts[3] = (float)a / 255; //A
+	ps_alu_consts[0] = (float)r / 255; /* R */
+	ps_alu_consts[1] = (float)g / 255; /* G */
+	ps_alu_consts[2] = (float)b / 255; /* B */
+	ps_alu_consts[3] = (float)a / 255; /* A */
     }
     set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
 
@@ -374,8 +372,6 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
     /* Init */
     start_3d(pScrn, accel_state->ib);
 
-    //cp_set_surface_sync(pScrn, accel_state->ib);
-
     set_default_state(pScrn, accel_state->ib);
 
     /* Scissor / viewport */
@@ -434,22 +430,22 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
     tex_res.mip_base            = accel_state->src_mc_addr[0];
     if (src_bpp == 8) {
 	tex_res.format              = FMT_8;
-	tex_res.dst_sel_x           = SQ_SEL_1; //R
-	tex_res.dst_sel_y           = SQ_SEL_1; //G
-	tex_res.dst_sel_z           = SQ_SEL_1; //B
-	tex_res.dst_sel_w           = SQ_SEL_X; //A
+	tex_res.dst_sel_x           = SQ_SEL_1; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_1; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_1; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
     } else if (src_bpp == 16) {
 	tex_res.format              = FMT_5_6_5;
-	tex_res.dst_sel_x           = SQ_SEL_Z; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_X; //B
-	tex_res.dst_sel_w           = SQ_SEL_1; //A
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
     } else {
 	tex_res.format              = FMT_8_8_8_8;
-	tex_res.dst_sel_x           = SQ_SEL_Z; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_X; //B
-	tex_res.dst_sel_w           = SQ_SEL_W; //A
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
     }
 
     tex_res.request_size        = 1;
@@ -471,13 +467,13 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
 
     /* Render setup */
     if (planemask & 0x000000ff)
-	pmask |= 4; //B
+	pmask |= 4; /* B */
     if (planemask & 0x0000ff00)
-	pmask |= 2; //G
+	pmask |= 2; /* G */
     if (planemask & 0x00ff0000)
-	pmask |= 1; //R
+	pmask |= 1; /* R */
     if (planemask & 0xff000000)
-	pmask |= 8; //A
+	pmask |= 8; /* A */
     EREG(accel_state->ib, CB_SHADER_MASK,                      (pmask << OUTPUT0_ENABLE_shift));
     EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL,              (RT0_ENABLE_bit));
     EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[rop]);
@@ -494,13 +490,13 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
     cb_conf.base = accel_state->dst_mc_addr;
     if (dst_bpp == 8) {
 	cb_conf.format = COLOR_8;
-	cb_conf.comp_swap = 3; // A
+	cb_conf.comp_swap = 3; /* A */
     } else if (dst_bpp == 16) {
 	cb_conf.format = COLOR_5_6_5;
-	cb_conf.comp_swap = 2; // RGB
+	cb_conf.comp_swap = 2; /* RGB */
     } else {
 	cb_conf.format = COLOR_8_8_8_8;
-	cb_conf.comp_swap = 1; // ARGB
+	cb_conf.comp_swap = 1; /* ARGB */
     }
     cb_conf.source_format = 1;
     cb_conf.blend_clamp = 1;
@@ -513,16 +509,16 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn,
 								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
 
     /* Interpolator setup */
-    // export tex coord from VS
+    /* export tex coord from VS */
     EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
     EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
 
     /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x
      * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */
-    // input tex coord from VS
+    /* input tex coord from VS */
     EREG(accel_state->ib, SPI_PS_IN_CONTROL_0,                 ((1 << NUM_INTERP_shift)));
     EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
-    // color semantic id 0 -> GPR[0]
+    /* color semantic id 0 -> GPR[0] */
     EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
 								(0x01 << DEFAULT_VAL_shift)	|
 								SEL_CENTROID_bit));
@@ -648,13 +644,13 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     accel_state->dst_height = pDst->drawable.height;
     accel_state->dst_bpp = pDst->drawable.bitsPerPixel;
 
-    // bad pitch
+    /* bad pitch */
     if (accel_state->src_pitch[0] & 7)
 	return FALSE;
     if (accel_state->dst_pitch & 7)
 	return FALSE;
 
-    // bad offset
+    /* bad offset */
     if (accel_state->src_mc_addr[0] & 0xff)
 	return FALSE;
     if (accel_state->dst_mc_addr & 0xff)
@@ -665,7 +661,7 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     if (pDst->drawable.bitsPerPixel == 24)
 	return FALSE;
 
-    //return FALSE;
+    /* return FALSE; */
 
 #ifdef SHOW_VERTEXES
     ErrorF("src: %dx%d @ %dbpp, 0x%08x\n", pSrc->drawable.width, pSrc->drawable.height,
@@ -704,10 +700,10 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
 static Bool
 is_overlap(int sx1, int sx2, int sy1, int sy2, int dx1, int dx2, int dy1, int dy2)
 {
-    if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TL x1, y1
-	((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || // TR x2, y1
-	((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || // BL x1, y2
-	((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)))   // BR x2, y2
+    if (((sx1 >= dx1) && (sx1 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TL x1, y1 */
+	((sx2 >= dx1) && (sx2 <= dx2) && (sy1 >= dy1) && (sy1 <= dy2)) || /* TR x2, y1 */
+	((sx1 >= dx1) && (sx1 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)) || /* BL x1, y2 */
+	((sx2 >= dx1) && (sx2 <= dx2) && (sy2 >= dy1) && (sy2 <= dy2)))   /* BR x2, y2 */
 	return TRUE;
     else
 	return FALSE;
@@ -736,9 +732,9 @@ R600OverlapCopy(PixmapPtr pDst,
          * by copying a part of the  non-overlapping portion, then adjusting coordinates
          * Choose horizontal vs vertical to minimize the total number of copy operations
          */
-        if (vchunk != 0 && hchunk != 0) { //diagonal
-            if ((w / hchunk) <= (h / vchunk)) { // reduce to horizontal
-                if (srcY > dstY ) { // diagonal up
+        if (vchunk != 0 && hchunk != 0) { /* diagonal */
+            if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal  */
+                if (srcY > dstY ) { /* diagonal up */
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -748,7 +744,7 @@ R600OverlapCopy(PixmapPtr pDst,
 
                     srcY = srcY + vchunk;
                     dstY = dstY + vchunk;
-                } else { // diagonal down
+                } else { /* diagonal down */
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -758,8 +754,8 @@ R600OverlapCopy(PixmapPtr pDst,
                 }
                 h = h - vchunk;
                 vchunk = 0;
-            } else { //reduce to vertical
-                if (srcX > dstX ) { // diagonal left
+            } else { /* reduce to vertical */
+                if (srcX > dstX ) { /* diagonal left */
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -769,7 +765,7 @@ R600OverlapCopy(PixmapPtr pDst,
 
                     srcX = srcX + hchunk;
                     dstX = dstX + hchunk;
-                } else { // diagonal right
+                } else { /* diagonal right */
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
                                       dst_pitch, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -782,9 +778,9 @@ R600OverlapCopy(PixmapPtr pDst,
             }
         }
 
-	if (vchunk == 0) { // left/right
-	    if (srcX < dstX) { // right
-		// copy right to left
+	if (vchunk == 0) { /* left/right */
+	    if (srcX < dstX) { /* right */
+		/* copy right to left */
 		for (i = w; i > 0; i -= hchunk) {
 		    R600DoPrepareCopy(pScrn,
 				      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -793,8 +789,8 @@ R600OverlapCopy(PixmapPtr pDst,
 		    R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h);
 		    R600DoCopy(pScrn);
 		}
-	    } else { //left
-		// copy left to right
+	    } else { /* left */
+		/* copy left to right */
 		for (i = 0; i < w; i += hchunk) {
 		    R600DoPrepareCopy(pScrn,
 				      dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -805,9 +801,9 @@ R600OverlapCopy(PixmapPtr pDst,
 		    R600DoCopy(pScrn);
 		}
 	    }
-	} else { //up/down
-	    if (srcY > dstY) { // up
-		// copy top to bottom
+	} else { /* up/down */
+	    if (srcY > dstY) { /* up */
+		/* copy top to bottom */
                 for (i = 0; i < h; i += vchunk) {
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -818,8 +814,8 @@ R600OverlapCopy(PixmapPtr pDst,
                     R600AppendCopyVertex(pScrn, srcX, srcY + i, dstX, dstY + i, w, vchunk);
                     R600DoCopy(pScrn);
                 }
-	    } else { // down
-		// copy bottom to top
+	    } else { /* down */
+		/* copy bottom to top */
                 for (i = h; i > 0; i -= vchunk) {
                     R600DoPrepareCopy(pScrn,
                                       dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel,
@@ -1081,7 +1077,7 @@ static Bool R600CheckCompositeTexture(PicturePtr pPict,
      * matter. I have not, however, verified that the X server always does such
      * clipping.
      */
-    //FIXME R6xx
+    /* FIXME R6xx */
     if (pPict->transform != 0 && !pPict->repeat && PICT_FORMAT_A(pPict->format) == 0) {
 	if (!(((op == PictOpSrc) || (op == PictOpClear)) && (PICT_FORMAT_A(pDstPict->format) == 0)))
 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
@@ -1113,7 +1109,7 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     accel_state->texW[unit] = w;
     accel_state->texH[unit] = h;
 
-    //ErrorF("Tex %d setup %dx%d\n", unit, w, h);
+    /* ErrorF("Tex %d setup %dx%d\n", unit, w, h);  */
 
     accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
     accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * h;
@@ -1138,41 +1134,41 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix,
     switch (pPict->format) {
     case PICT_a1r5g5b5:
     case PICT_a8r8g8b8:
-	tex_res.dst_sel_x           = SQ_SEL_Z; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_X; //B
-	tex_res.dst_sel_w           = SQ_SEL_W; //A
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
 	break;
     case PICT_a8b8g8r8:
-	tex_res.dst_sel_x           = SQ_SEL_X; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_Z; //B
-	tex_res.dst_sel_w           = SQ_SEL_W; //A
+	tex_res.dst_sel_x           = SQ_SEL_X; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_Z; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_W; /* A */
 	break;
     case PICT_x8b8g8r8:
-	tex_res.dst_sel_x           = SQ_SEL_X; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_Z; //B
-	tex_res.dst_sel_w           = SQ_SEL_1; //A
+	tex_res.dst_sel_x           = SQ_SEL_X; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_Z; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
 	break;
     case PICT_x1r5g5b5:
     case PICT_x8r8g8b8:
-	tex_res.dst_sel_x           = SQ_SEL_Z; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_X; //B
-	tex_res.dst_sel_w           = SQ_SEL_1; //A
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
 	break;
     case PICT_r5g6b5:
-	tex_res.dst_sel_x           = SQ_SEL_Z; //R
-	tex_res.dst_sel_y           = SQ_SEL_Y; //G
-	tex_res.dst_sel_z           = SQ_SEL_X; //B
-	tex_res.dst_sel_w           = SQ_SEL_1; //A
+	tex_res.dst_sel_x           = SQ_SEL_Z; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_Y; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_X; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_1; /* A */
 	break;
     case PICT_a8:
-	tex_res.dst_sel_x           = SQ_SEL_0; //R
-	tex_res.dst_sel_y           = SQ_SEL_0; //G
-	tex_res.dst_sel_z           = SQ_SEL_0; //B
-	tex_res.dst_sel_w           = SQ_SEL_X; //A
+	tex_res.dst_sel_x           = SQ_SEL_0; /* R */
+	tex_res.dst_sel_y           = SQ_SEL_0; /* G */
+	tex_res.dst_sel_z           = SQ_SEL_0; /* B */
+	tex_res.dst_sel_w           = SQ_SEL_X; /* A */
 	break;
     default:
 	RADEON_FALLBACK(("Bad format 0x%x\n", pPict->format));
@@ -1243,10 +1239,7 @@ static Bool R600CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskP
 			       PicturePtr pDstPicture)
 {
     uint32_t tmp1;
-//    ScreenPtr pScreen = pDstPicture->pDrawable->pScreen;
     PixmapPtr pSrcPixmap, pDstPixmap;
-//    ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
-//    RADEONInfoPtr info = RADEONPTR(pScrn);
     int max_tex_w, max_tex_h, max_dst_w, max_dst_h;
 
     /* Check for unsupported compositing operations. */
@@ -1326,7 +1319,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     int i = 0;
     uint32_t ps[24];
 
-    //return FALSE;
+    /* return FALSE; */
 
     if (pMask)
 	accel_state->has_mask = TRUE;
@@ -1372,104 +1365,83 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
 	/* setup pixel shader */
 	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) {
-	    //src_color = R300_ALU_RGB_0_0;
 	    src_r = SQ_SEL_0;
 	    src_g = SQ_SEL_0;
 	    src_b = SQ_SEL_0;
 	} else {
-	    //src_color = R300_ALU_RGB_SRC0_RGB;
 	    src_r = SQ_SEL_X;
 	    src_g = SQ_SEL_Y;
 	    src_b = SQ_SEL_Z;
 	}
 
 	if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-	    //src_alpha = R300_ALU_ALPHA_1_0;
 	    src_a = SQ_SEL_1;
 	} else {
-	    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 	    src_a = SQ_SEL_W;
 	}
 
 	if (pMaskPicture->componentAlpha) {
 	    if (R600BlendOp[op].src_alpha) {
 		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-		    //src_color = R300_ALU_RGB_1_0;
-		    //src_alpha = R300_ALU_ALPHA_1_0;
 		    src_r = SQ_SEL_1;
 		    src_g = SQ_SEL_1;
 		    src_b = SQ_SEL_1;
 		    src_a = SQ_SEL_1;
 		} else {
-		    //src_color = R300_ALU_RGB_SRC0_AAA;
-		    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 		    src_r = SQ_SEL_W;
 		    src_g = SQ_SEL_W;
 		    src_b = SQ_SEL_W;
 		    src_a = SQ_SEL_W;
 		}
 
-		//mask_color = R300_ALU_RGB_SRC1_RGB;
 		mask_r = SQ_SEL_X;
 		mask_g = SQ_SEL_Y;
 		mask_b = SQ_SEL_Z;
 
 		if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		    //mask_alpha = R300_ALU_ALPHA_1_0;
 		    mask_a = SQ_SEL_1;
 		} else {
-		    //mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		    mask_a = SQ_SEL_W;
 		}
 	    } else {
-		//src_color = R300_ALU_RGB_SRC0_RGB;
 		src_r = SQ_SEL_X;
 		src_g = SQ_SEL_Y;
 		src_b = SQ_SEL_Z;
 
 		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-		    //src_alpha = R300_ALU_ALPHA_1_0;
 		    src_a = SQ_SEL_1;
 		} else {
-		    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 		    src_a = SQ_SEL_W;
 		}
 
-		//mask_color = R300_ALU_RGB_SRC1_RGB;
 		mask_r = SQ_SEL_X;
 		mask_g = SQ_SEL_Y;
 		mask_b = SQ_SEL_Z;
 
 		if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		    //mask_alpha = R300_ALU_ALPHA_1_0;
 		    mask_a = SQ_SEL_1;
 		} else {
-		    //mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		    mask_a = SQ_SEL_W;
 		}
 	    }
 	} else {
 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		//mask_color = R300_ALU_RGB_1_0;
 		mask_r = SQ_SEL_1;
 		mask_g = SQ_SEL_1;
 		mask_b = SQ_SEL_1;
 	    } else {
-		//mask_color = R300_ALU_RGB_SRC1_AAA;
 		mask_r = SQ_SEL_W;
 		mask_g = SQ_SEL_W;
 		mask_b = SQ_SEL_W;
 	    }
 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0) {
-		//mask_alpha = R300_ALU_ALPHA_1_0;
 		mask_a = SQ_SEL_1;
 	    } else {
-		//mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		mask_a = SQ_SEL_W;
 	    }
 	}
 
-	//0
+	/* 0 */
 	ps[i++] = CF_DWORD0(ADDR(8));
 	ps[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
@@ -1482,7 +1454,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
 
-	// 1
+	/* 1 */
 	ps[i++] = CF_ALU_DWORD0(ADDR(3),
 				KCACHE_BANK0(0),
 				KCACHE_BANK1(0),
@@ -1496,7 +1468,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 				WHOLE_QUAD_MODE(0),
 				BARRIER(1));
 
-	//2
+	/* 2 */
 	ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 					  TYPE(SQ_EXPORT_PIXEL),
 					  RW_GPR(2),
@@ -1516,8 +1488,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 					       WHOLE_QUAD_MODE(0),
 					       BARRIER(1));
 
-	// 3 - alu 0
-	// MUL gpr[2].x gpr[1].x gpr[0].x
+	/* 3 - alu 0 */
+	/* MUL gpr[2].x gpr[1].x gpr[0].x */
 	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_X),
@@ -1543,8 +1515,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_X),
 				 CLAMP(1));
-	// 4 - alu 1
-	// MUL gpr[2].y gpr[1].y gpr[0].y
+	/* 4 - alu 1 */
+	/* MUL gpr[2].y gpr[1].y gpr[0].y */
 	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_Y),
@@ -1570,8 +1542,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_Y),
 				 CLAMP(1));
-	// 5 - alu 2
-	// MUL gpr[2].z gpr[1].z gpr[0].z
+	/* 5 - alu 2 */
+	/* MUL gpr[2].z gpr[1].z gpr[0].z */
 	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_Z),
@@ -1597,8 +1569,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_Z),
 				 CLAMP(1));
-	// 6 - alu 3
-	// MUL gpr[2].w gpr[1].w gpr[0].w
+	/* 6 - alu 3 */
+	/* MUL gpr[2].w gpr[1].w gpr[0].w */
 	ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			     SRC0_REL(ABSOLUTE),
 			     SRC0_ELEM(ELEM_W),
@@ -1624,11 +1596,11 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 				 DST_REL(ABSOLUTE),
 				 DST_ELEM(ELEM_W),
 				 CLAMP(1));
-	// 7
+	/* 7 */
 	ps[i++] = 0x00000000;
 	ps[i++] = 0x00000000;
 
-	//8/9 - src
+	/* 8/9 - src */
 	ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			     BC_FRAC_MODE(0),
 			     FETCH_WHOLE_QUAD(0),
@@ -1656,7 +1628,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 			     SRC_SEL_Z(SQ_SEL_0),
 			     SRC_SEL_W(SQ_SEL_1));
 	ps[i++] = TEX_DWORD_PAD;
-	//10/11 - mask
+	/* 10/11 - mask */
 	ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			     BC_FRAC_MODE(0),
 			     FETCH_WHOLE_QUAD(0),
@@ -1688,26 +1660,22 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	int src_a, src_r, src_g, src_b;
 	/* setup pixel shader */
 	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0) {
-	    //src_color = R300_ALU_RGB_0_0;
 	    src_r = SQ_SEL_0;
 	    src_g = SQ_SEL_0;
 	    src_b = SQ_SEL_0;
 	} else {
-	    //src_color = R300_ALU_RGB_SRC0_RGB;
 	    src_r = SQ_SEL_X;
 	    src_g = SQ_SEL_Y;
 	    src_b = SQ_SEL_Z;
 	}
 
 	if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-	    //src_alpha = R300_ALU_ALPHA_1_0;
 	    src_a = SQ_SEL_1;
 	} else {
-	    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 	    src_a = SQ_SEL_W;
 	}
 
-	//0
+	/* 0 */
 	ps[i++] = CF_DWORD0(ADDR(2));
 	ps[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
@@ -1719,7 +1687,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 			    CF_INST(SQ_CF_INST_TEX),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
-	//1
+	/* 1 */
 	ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 					  TYPE(SQ_EXPORT_PIXEL),
 					  RW_GPR(0),
@@ -1740,7 +1708,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 					       BARRIER(1));
 
 
-	//2/3 - src
+	/* 2/3 - src */
 	ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			     BC_FRAC_MODE(0),
 			     FETCH_WHOLE_QUAD(0),
@@ -1779,20 +1747,18 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     /* Init */
     start_3d(pScrn, accel_state->ib);
 
-    //cp_set_surface_sync(pScrn, accel_state->ib);
-
     set_default_state(pScrn, accel_state->ib);
 
     /* Scissor / viewport */
     EREG(accel_state->ib, PA_CL_VTE_CNTL,                      VTX_XY_FMT_bit);
     EREG(accel_state->ib, PA_CL_CLIP_CNTL,                     CLIP_DISABLE_bit);
 
-    // fix me if false discard buffer!
+    /* fix me if false discard buffer! */
     if (!R600TextureSetup(pSrcPicture, pSrc, 0))
 	return FALSE;
 
     if (pMask != NULL) {
-	// fix me if false discard buffer!
+	/* fix me if false discard buffer! */
 	if (!R600TextureSetup(pMaskPicture, pMask, 1))
 	    return FALSE;
     } else {
@@ -1842,7 +1808,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format);
 
     if (info->ChipFamily == CHIP_FAMILY_R600) {
-	// no per-MRT blend on R600
+	/* no per-MRT blend on R600 */
 	EREG(accel_state->ib, CB_COLOR_CONTROL,                    RADEON_ROP[3] | (1 << TARGET_BLEND_ENABLE_shift));
 	EREG(accel_state->ib, CB_BLEND_CONTROL,                    blendcntl);
     } else {
@@ -1860,31 +1826,17 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     switch (pDstPicture->format) {
     case PICT_a8r8g8b8:
-	//ErrorF("dst: PICT_a8r8g8b8\n");
-	cb_conf.comp_swap = 1; //ARGB
-	break;
     case PICT_x8r8g8b8:
-	//ErrorF("dst: PICT_x8r8g8b8\n");
-	cb_conf.comp_swap = 1; //ARGB
-	break;
-    case PICT_r5g6b5:
-	//ErrorF("dst: PICT_r5g6b5\n");
-	cb_conf.comp_swap = 2; //RGB
-	break;
     case PICT_a1r5g5b5:
-	//ErrorF("dst: PICT_a1r5g5b5\n");
-	cb_conf.comp_swap = 1; //ARGB
-	break;
     case PICT_x1r5g5b5:
-	//ErrorF("dst: PICT_x1r5g5b5\n");
-	cb_conf.comp_swap = 1; //ARGB
+    default:
+	cb_conf.comp_swap = 1; /* ARGB */
 	break;
-    case PICT_a8:
-	//ErrorF("dst: PICT_a8\n");
-	cb_conf.comp_swap = 3; //A
+    case PICT_r5g6b5:
+	cb_conf.comp_swap = 2; /* RGB */
 	break;
-    default:
-	cb_conf.comp_swap = 1;
+    case PICT_a8:
+	cb_conf.comp_swap = 3; /* A */
 	break;
     }
     cb_conf.source_format = 1;
@@ -1899,27 +1851,27 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     /* Interpolator setup */
     if (pMask) {
-	// export 2 tex coords from VS
+	/* export 2 tex coords from VS */
 	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift));
-	// src = semantic id 0; mask = semantic id 1
+	/* src = semantic id 0; mask = semantic id 1 */
 	EREG(accel_state->ib, SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
 						  (1 << SEMANTIC_1_shift)));
-	// input 2 tex coords from VS
+	/* input 2 tex coords from VS */
 	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (2 << NUM_INTERP_shift));
     } else {
-	// export 1 tex coords from VS
+	/* export 1 tex coords from VS */
 	EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
-	// src = semantic id 0
+	/* src = semantic id 0 */
 	EREG(accel_state->ib, SPI_VS_OUT_ID_0,   (0 << SEMANTIC_0_shift));
-	// input 1 tex coords from VS
+	/* input 1 tex coords from VS */
 	EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, (1 << NUM_INTERP_shift));
     }
     EREG(accel_state->ib, SPI_PS_IN_CONTROL_1,                 0);
-    // SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0
+    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
     EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2),       ((0    << SEMANTIC_shift)	|
 								(0x01 << DEFAULT_VAL_shift)	|
 								SEL_CENTROID_bit));
-    // SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1
+    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
     EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (1 <<2),       ((1    << SEMANTIC_shift)	|
 								(0x01 << DEFAULT_VAL_shift)	|
 								SEL_CENTROID_bit));
@@ -1954,7 +1906,7 @@ static void R600Composite(PixmapPtr pDst,
     srcBottomRight.x = IntToxFixed(srcX + w);
     srcBottomRight.y = IntToxFixed(srcY + h);
 
-    //XXX do transform in vertex shader
+    /* XXX do transform in vertex shader */
     if (accel_state->is_transform[0]) {
 	transformPoint(accel_state->transform[0], &srcTopLeft);
 	transformPoint(accel_state->transform[0], &srcTopRight);
@@ -2137,7 +2089,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn,
     temph = hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
     dst = (char *)scratch->address;
 
-    //memcopy from sys to scratch
+    /* memcopy from sys to scratch */
     while (temph--) {
 	memcpy (dst, src, wpass);
 	src += src_pitch;
@@ -2153,7 +2105,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn,
 	if (hpass) {
 	    scratch_offset = scratch->total/2 - scratch_offset;
 	    dst = (char *)scratch->address + scratch_offset;
-	    // wait for the engine to be idle
+	    /* wait for the engine to be idle */
 	    RADEONWaitForIdleCP(pScrn);
 	    //memcopy from sys to scratch
 	    while (temph--) {
@@ -2162,7 +2114,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn,
 		dst += scratch_pitch_bytes;
 	    }
 	}
-	//blit from scratch to vram
+	/* blit from scratch to vram */
 	R600DoPrepareCopy(pScrn,
 			  scratch_pitch, w, oldhpass, offset, bpp,
 			  dst_pitch, dst_height, dst_mc_addr, bpp,
@@ -2222,7 +2174,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
     scratch_mc_addr = info->gartLocation + info->dri->bufStart + (scratch->idx * scratch->total);
     hpass = min(h, scratch->total/2 / scratch_pitch_bytes);
 
-    //blit from vram to scratch
+    /* blit from vram to scratch */
     R600DoPrepareCopy(pScrn,
 		      src_pitch, src_width, src_height, src_mc_addr, bpp,
 		      scratch_pitch, hpass, scratch_mc_addr, bpp,
@@ -2239,7 +2191,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
 
 	if (hpass) {
 	    scratch_offset = scratch->total/2 - scratch_offset;
-	    //blit from vram to scratch
+	    /* blit from vram to scratch */
 	    R600DoPrepareCopy(pScrn,
 			      src_pitch, src_width, src_height, src_mc_addr, bpp,
 			      scratch_pitch, hpass, scratch_mc_addr + scratch_offset, bpp,
@@ -2248,9 +2200,9 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h,
 	    R600DoCopy(pScrn);
 	}
 
-	// wait for the engine to be idle
+	/* wait for the engine to be idle */
 	RADEONWaitForIdleCP(pScrn);
-	//memcopy from scratch to sys
+	/* memcopy from scratch to sys */
 	while (oldhpass--) {
 	    memcpy (dst, src, wpass);
 	    dst += dst_pitch;
@@ -2296,7 +2248,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     struct radeon_accel_state *accel_state = info->accel_state;
     uint32_t *vs;
     uint32_t *ps;
-    // 512 bytes per shader for now
+    /* 512 bytes per shader for now */
     int size = 512 * 11;
     int i;
 
@@ -2322,9 +2274,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     accel_state->xv_ps_offset_packed = 4608;
     accel_state->xv_ps_offset_planar = 5120;
 
-    // solid vs ---------------------------------------
+    /* solid vs --------------------------------------- */
     i = accel_state->solid_vs_offset / 4;
-    //0
+    /* 0 */
     vs[i++] = CF_DWORD0(ADDR(4));
     vs[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -2336,7 +2288,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_VTX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    //1
+    /* 1 */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
 				      TYPE(SQ_EXPORT_POS),
 				      RW_GPR(1),
@@ -2354,7 +2306,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    //2 - always export a param whether it's used or not
+    /*2 - always export a param whether it's used or not */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(0),
@@ -2371,11 +2323,11 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   VALID_PIXEL_MODE(0),
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
-					   BARRIER(0));
-    //3 - padding
+		 			   BARRIER(0));
+    /* 3 - padding */
     vs[i++] = 0x00000000;
     vs[i++] = 0x00000000;
-    //4/5
+    /* 4/5 */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -2391,9 +2343,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(0),
 			 ENDIAN_SWAP(ENDIAN_NONE),
@@ -2401,9 +2353,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(1));
     vs[i++] = VTX_DWORD_PAD;
 
-    // solid ps ---------------------------------------
+    /* solid ps --------------------------------------- */
     i = accel_state->solid_ps_offset / 4;
-    // 0
+    /* 0 */
     ps[i++] = CF_ALU_DWORD0(ADDR(2),
 			    KCACHE_BANK0(0),
 			    KCACHE_BANK1(0),
@@ -2416,7 +2368,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			    CF_INST(SQ_CF_INST_ALU),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
-    // 1
+    /* 1 */
     ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 				      TYPE(SQ_EXPORT_PIXEL),
 				      RW_GPR(0),
@@ -2435,7 +2387,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
 
-    // 2
+    /* 2 */
     ps[i++] = ALU_DWORD0(SRC0_SEL(256),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -2461,7 +2413,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 3
+    /* 3 */
     ps[i++] = ALU_DWORD0(SRC0_SEL(256),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -2487,7 +2439,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 4
+    /* 4 */
     ps[i++] = ALU_DWORD0(SRC0_SEL(256),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -2513,7 +2465,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 5
+    /* 5 */
     ps[i++] = ALU_DWORD0(SRC0_SEL(256),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -2540,9 +2492,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
 
-    // copy vs ---------------------------------------
+    /* copy vs --------------------------------------- */
     i = accel_state->copy_vs_offset / 4;
-    //0
+    /* 0 */
     vs[i++] = CF_DWORD0(ADDR(4));
     vs[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -2554,7 +2506,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_VTX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    //1
+    /* 1 */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
 				      TYPE(SQ_EXPORT_POS),
 				      RW_GPR(1),
@@ -2572,7 +2524,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    //2
+    /* 2 */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(0),
@@ -2590,10 +2542,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //3
+    /* 3 */
     vs[i++] = 0x00000000;
     vs[i++] = 0x00000000;
-    //4/5
+    /* 4/5 */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -2609,16 +2561,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(0),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(1));
     vs[i++] = VTX_DWORD_PAD;
-    //6/7
+    /* 6/7 */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -2634,9 +2586,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(8),
 			 ENDIAN_SWAP(ENDIAN_NONE),
@@ -2644,9 +2596,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
 
-    // copy ps ---------------------------------------
+    /* copy ps --------------------------------------- */
     i = accel_state->copy_ps_offset / 4;
-    // CF INST 0
+    /* CF INST 0 */
     ps[i++] = CF_DWORD0(ADDR(2));
     ps[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -2658,7 +2610,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_TEX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    // CF INST 1
+    /* CF INST 1 */
     ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 				      TYPE(SQ_EXPORT_PIXEL),
 				      RW_GPR(0),
@@ -2676,7 +2628,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    // TEX INST 0
+    /* TEX INST 0 */
     ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			 BC_FRAC_MODE(0),
 			 FETCH_WHOLE_QUAD(0),
@@ -2686,10 +2638,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 R7xx_ALT_CONST(0));
     ps[i++] = TEX_DWORD1(DST_GPR(0),
 			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_X), //R
-			 DST_SEL_Y(SQ_SEL_Y), //G
-			 DST_SEL_Z(SQ_SEL_Z), //B
-			 DST_SEL_W(SQ_SEL_W), //A
+			 DST_SEL_X(SQ_SEL_X), /* R */
+			 DST_SEL_Y(SQ_SEL_Y), /* G */
+			 DST_SEL_Z(SQ_SEL_Z), /* B */
+			 DST_SEL_W(SQ_SEL_W), /* A */
 			 LOD_BIAS(0),
 			 COORD_TYPE_X(TEX_UNNORMALIZED),
 			 COORD_TYPE_Y(TEX_UNNORMALIZED),
@@ -2705,9 +2657,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 SRC_SEL_W(SQ_SEL_1));
     ps[i++] = TEX_DWORD_PAD;
 
-    // xv vs ---------------------------------------
+    /* xv vs --------------------------------------- */
     i = accel_state->xv_vs_offset / 4;
-    //0
+    /* 0 */
     vs[i++] = CF_DWORD0(ADDR(4));
     vs[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -2719,7 +2671,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_VTX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    //1
+    /* 1 */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
 				      TYPE(SQ_EXPORT_POS),
 				      RW_GPR(1),
@@ -2737,7 +2689,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    //2
+    /* 2 */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(0),
@@ -2755,10 +2707,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //3
+    /* 3 */
     vs[i++] = 0x00000000;
     vs[i++] = 0x00000000;
-    //4/5
+    /* 4/5 */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -2774,16 +2726,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(0),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(1));
     vs[i++] = VTX_DWORD_PAD;
-    //6/7
+    /* 6/7 */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -2799,9 +2751,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(8),
 			 ENDIAN_SWAP(ENDIAN_NONE),
@@ -2809,9 +2761,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
 
-    // xv ps packed ----------------------------------
+    /* xv ps packed ---------------------------------- */
     i = accel_state->xv_ps_offset_packed / 4;
-    // 0
+    /* 0 */
     ps[i++] = CF_DWORD0(ADDR(20));
     ps[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -2823,7 +2775,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_TEX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(0));
-    // 1
+    /* 1 */
     ps[i++] = CF_ALU_DWORD0(ADDR(3),
 			    KCACHE_BANK0(0),
 			    KCACHE_BANK1(0),
@@ -2836,7 +2788,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			    CF_INST(SQ_CF_INST_ALU),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
-    // 2
+    /* 2 */
     ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 				      TYPE(SQ_EXPORT_PIXEL),
 				      RW_GPR(2),
@@ -2858,8 +2810,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
      *  Y' is scaled from 16:235
      *  Cb/Cr are scaled from 16:240
      */
-    // 3 - alu 0
-    // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
+    /* 3 - alu 0 */
+    /* MULADD gpr[1].x gpr[1].x c[3].x c[3].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -2881,8 +2833,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 4 - alu 1
-    // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
+    /* 4 - alu 1 */
+    /* MULADD gpr[1].y gpr[1].y c[3].z c[3].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -2904,8 +2856,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(0));
-    // 5 - alu 2
-    // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
+    /* 5 - alu 2 */
+    /* MULADD gpr[1].z gpr[1].z c[3].z c[3].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -2927,8 +2879,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(0));
-    // 6 - alu 3
-    // MOV gpr[1].w 0.0
+    /* 6 - alu 3 */
+    /* MOV gpr[1].w 0.0 */
     ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -2954,8 +2906,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(0));
-    // 7 - alu 4
-    // DP4 gpr[2].x gpr[1].x c[0].x
+    /* 7 - alu 4 */
+    /* DP4 gpr[2].x gpr[1].x c[0].x */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -2981,8 +2933,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 8 - alu 5
-    // DP4 gpr[2].y gpr[1].y c[0].y
+    /* 8 - alu 5 */
+    /* DP4 gpr[2].y gpr[1].y c[0].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3008,8 +2960,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 9 - alu 6
-    // DP4 gpr[2].z gpr[1].z c[0].z
+    /* 9 - alu 6 */
+    /* DP4 gpr[2].z gpr[1].z c[0].z */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3035,8 +2987,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 10 - alu 7
-    // DP4 gpr[2].w gpr[1].w c[0].w
+    /* 10 - alu 7 */
+    /* DP4 gpr[2].w gpr[1].w c[0].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -3062,8 +3014,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
-    // 11 - alu 8
-    // DP4 gpr[2].x gpr[1].x c[1].x
+    /* 11 - alu 8 */
+    /* DP4 gpr[2].x gpr[1].x c[1].x */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3089,8 +3041,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 12 - alu 9
-    // DP4 gpr[2].y gpr[1].y c[1].y
+    /* 12 - alu 9 */
+    /* DP4 gpr[2].y gpr[1].y c[1].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3116,8 +3068,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 13 - alu 10
-    // DP4 gpr[2].z gpr[1].z c[1].z
+    /* 13 - alu 10 */
+    /* DP4 gpr[2].z gpr[1].z c[1].z */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3143,8 +3095,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 14 - alu 11
-    // DP4 gpr[2].w gpr[1].w c[1].w
+    /* 14 - alu 11 */
+    /* DP4 gpr[2].w gpr[1].w c[1].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -3170,8 +3122,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
-    // 15 - alu 12
-    // DP4 gpr[2].x gpr[1].x c[2].x
+    /* 15 - alu 12 */
+    /* DP4 gpr[2].x gpr[1].x c[2].x */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3197,8 +3149,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 16 - alu 13
-    // DP4 gpr[2].y gpr[1].y c[2].y
+    /* 16 - alu 13 */
+    /* DP4 gpr[2].y gpr[1].y c[2].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3224,8 +3176,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 17 - alu 14
-    // DP4 gpr[2].z gpr[1].z c[2].z
+    /* 17 - alu 14 */
+    /* DP4 gpr[2].z gpr[1].z c[2].z */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3251,8 +3203,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 18 - alu 15
-    // DP4 gpr[2].w gpr[1].w c[2].w
+    /* 18 - alu 15 */
+    /* DP4 gpr[2].w gpr[1].w c[2].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -3278,10 +3230,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
-    // 19 - alignment
+    /* 19 - alignment */
     ps[i++] = 0x00000000;
     ps[i++] = 0x00000000;
-    // 20/21 - tex 0
+    /* 20/21 - tex 0 */
     ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			 BC_FRAC_MODE(0),
 			 FETCH_WHOLE_QUAD(0),
@@ -3291,10 +3243,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 R7xx_ALT_CONST(0));
     ps[i++] = TEX_DWORD1(DST_GPR(1),
 			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_X),    //R
-			 DST_SEL_Y(SQ_SEL_MASK), //G
-			 DST_SEL_Z(SQ_SEL_MASK), //B
-			 DST_SEL_W(SQ_SEL_1),    //A
+			 DST_SEL_X(SQ_SEL_X),    /* R */
+			 DST_SEL_Y(SQ_SEL_MASK), /* G */
+			 DST_SEL_Z(SQ_SEL_MASK), /* B */
+			 DST_SEL_W(SQ_SEL_1),    /* A */
 			 LOD_BIAS(0),
 			 COORD_TYPE_X(TEX_NORMALIZED),
 			 COORD_TYPE_Y(TEX_NORMALIZED),
@@ -3309,7 +3261,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 SRC_SEL_Z(SQ_SEL_0),
 			 SRC_SEL_W(SQ_SEL_1));
     ps[i++] = TEX_DWORD_PAD;
-    // 22/23 - tex 1
+    /* 22/23 - tex 1 */
     ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			 BC_FRAC_MODE(0),
 			 FETCH_WHOLE_QUAD(0),
@@ -3319,10 +3271,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 R7xx_ALT_CONST(0));
     ps[i++] = TEX_DWORD1(DST_GPR(1),
 			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_MASK), //R
-			 DST_SEL_Y(SQ_SEL_X),    //G
-			 DST_SEL_Z(SQ_SEL_Y),    //B
-			 DST_SEL_W(SQ_SEL_MASK), //A
+			 DST_SEL_X(SQ_SEL_MASK), /* R */
+			 DST_SEL_Y(SQ_SEL_X),    /* G */
+			 DST_SEL_Z(SQ_SEL_Y),    /* B */
+			 DST_SEL_W(SQ_SEL_MASK), /* A */
 			 LOD_BIAS(0),
 			 COORD_TYPE_X(TEX_NORMALIZED),
 			 COORD_TYPE_Y(TEX_NORMALIZED),
@@ -3338,9 +3290,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 SRC_SEL_W(SQ_SEL_1));
     ps[i++] = TEX_DWORD_PAD;
 
-    // xv ps planar ----------------------------------
-     i = accel_state->xv_ps_offset_planar / 4;
-    // 0
+    /* xv ps planar ---------------------------------- */
+    i = accel_state->xv_ps_offset_planar / 4;
+    /* 0 */
     ps[i++] = CF_DWORD0(ADDR(20));
     ps[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -3352,7 +3304,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_TEX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(0));
-    // 1
+    /* 1 */
     ps[i++] = CF_ALU_DWORD0(ADDR(3),
 			    KCACHE_BANK0(0),
 			    KCACHE_BANK1(0),
@@ -3365,7 +3317,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			    CF_INST(SQ_CF_INST_ALU),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
-    // 2
+    /* 2 */
     ps[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 				      TYPE(SQ_EXPORT_PIXEL),
 				      RW_GPR(2),
@@ -3387,8 +3339,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
      *  Y' is scaled from 16:235
      *  Cb/Cr are scaled from 16:240
      */
-    // 3 - alu 0
-    // MULADD gpr[1].x gpr[1].x c[3].x c[3].y
+    /* 3 - alu 0 */
+    /* MULADD gpr[1].x gpr[1].x c[3].x c[3].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3410,8 +3362,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 4 - alu 1
-    // MULADD gpr[1].y gpr[1].y c[3].z c[3].w
+    /* 4 - alu 1 */
+    /* MULADD gpr[1].y gpr[1].y c[3].z c[3].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3433,8 +3385,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(0));
-    // 5 - alu 2
-    // MULADD gpr[1].z gpr[1].z c[3].z c[3].w
+    /* 5 - alu 2 */
+    /* MULADD gpr[1].z gpr[1].z c[3].z c[3].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3456,8 +3408,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(0));
-    // 6 - alu 3
-    // MOV gpr[1].w 0.0
+    /* 6 - alu 3 */
+    /* MOV gpr[1].w 0.0 */
     ps[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3483,8 +3435,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(0));
-    // 7 - alu 4
-    // DP4 gpr[2].x gpr[1].x c[0].x
+    /* 7 - alu 4 */
+    /* DP4 gpr[2].x gpr[1].x c[0].x */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3510,8 +3462,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 8 - alu 5
-    // DP4 gpr[2].y gpr[1].y c[0].y
+    /* 8 - alu 5 */
+    /* DP4 gpr[2].y gpr[1].y c[0].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3537,8 +3489,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 9 - alu 6
-    // DP4 gpr[2].z gpr[1].z c[0].z
+    /* 9 - alu 6 */
+    /* DP4 gpr[2].z gpr[1].z c[0].z */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3564,8 +3516,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 10 - alu 7
-    // DP4 gpr[2].w gpr[1].w c[0].w
+    /* 10 - alu 7 */
+    /* DP4 gpr[2].w gpr[1].w c[0].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -3591,8 +3543,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
-    // 11 - alu 8
-    // DP4 gpr[2].x gpr[1].x c[1].x
+    /* 11 - alu 8 */
+    /* DP4 gpr[2].x gpr[1].x c[1].x */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3618,8 +3570,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 12 - alu 9
-    // DP4 gpr[2].y gpr[1].y c[1].y
+    /* 12 - alu 9 */
+    /* DP4 gpr[2].y gpr[1].y c[1].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3645,8 +3597,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 13 - alu 10
-    // DP4 gpr[2].z gpr[1].z c[1].z
+    /* 13 - alu 10 */
+    /* DP4 gpr[2].z gpr[1].z c[1].z */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3672,8 +3624,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 14 - alu 11
-    // DP4 gpr[2].w gpr[1].w c[1].w
+    /* 14 - alu 11 */
+    /* DP4 gpr[2].w gpr[1].w c[1].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -3699,8 +3651,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
-    // 15 - alu 12
-    // DP4 gpr[2].x gpr[1].x c[2].x
+    /* 15 - alu 12 */
+    /* DP4 gpr[2].x gpr[1].x c[2].x */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_X),
@@ -3726,8 +3678,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_X),
 			     CLAMP(1));
-    // 16 - alu 13
-    // DP4 gpr[2].y gpr[1].y c[2].y
+    /* 16 - alu 13 */
+    /* DP4 gpr[2].y gpr[1].y c[2].y */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Y),
@@ -3753,8 +3705,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Y),
 			     CLAMP(1));
-    // 17 - alu 14
-    // DP4 gpr[2].z gpr[1].z c[2].z
+    /* 17 - alu 14 */
+    /* DP4 gpr[2].z gpr[1].z c[2].z */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_Z),
@@ -3780,8 +3732,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_Z),
 			     CLAMP(1));
-    // 18 - alu 15
-    // DP4 gpr[2].w gpr[1].w c[2].w
+    /* 18 - alu 15 */
+    /* DP4 gpr[2].w gpr[1].w c[2].w */
     ps[i++] = ALU_DWORD0(SRC0_SEL(1),
 			 SRC0_REL(ABSOLUTE),
 			 SRC0_ELEM(ELEM_W),
@@ -3807,10 +3759,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_REL(ABSOLUTE),
 			     DST_ELEM(ELEM_W),
 			     CLAMP(1));
-    // 19 - alignment
+    /* 19 - alignment */
     ps[i++] = 0x00000000;
     ps[i++] = 0x00000000;
-    // 20/21 - tex 0
+    /* 20/21 - tex 0 */
     ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			 BC_FRAC_MODE(0),
 			 FETCH_WHOLE_QUAD(0),
@@ -3820,10 +3772,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 R7xx_ALT_CONST(0));
     ps[i++] = TEX_DWORD1(DST_GPR(1),
 			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_X),    //R
-			 DST_SEL_Y(SQ_SEL_MASK), //G
-			 DST_SEL_Z(SQ_SEL_MASK), //B
-			 DST_SEL_W(SQ_SEL_1),    //A
+			 DST_SEL_X(SQ_SEL_X),    /* R */
+			 DST_SEL_Y(SQ_SEL_MASK), /* G */
+			 DST_SEL_Z(SQ_SEL_MASK), /* B */
+			 DST_SEL_W(SQ_SEL_1),    /* A */
 			 LOD_BIAS(0),
 			 COORD_TYPE_X(TEX_NORMALIZED),
 			 COORD_TYPE_Y(TEX_NORMALIZED),
@@ -3838,7 +3790,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 SRC_SEL_Z(SQ_SEL_0),
 			 SRC_SEL_W(SQ_SEL_1));
     ps[i++] = TEX_DWORD_PAD;
-    // 22/23 - tex 1
+    /* 22/23 - tex 1 */
     ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			 BC_FRAC_MODE(0),
 			 FETCH_WHOLE_QUAD(0),
@@ -3848,10 +3800,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 R7xx_ALT_CONST(0));
     ps[i++] = TEX_DWORD1(DST_GPR(1),
 			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_MASK), //R
-			 DST_SEL_Y(SQ_SEL_MASK), //G
-			 DST_SEL_Z(SQ_SEL_X),    //B
-			 DST_SEL_W(SQ_SEL_MASK), //A
+			 DST_SEL_X(SQ_SEL_MASK), /* R */
+			 DST_SEL_Y(SQ_SEL_MASK), /* G */
+			 DST_SEL_Z(SQ_SEL_X),    /* B */
+			 DST_SEL_W(SQ_SEL_MASK), /* A */
 			 LOD_BIAS(0),
 			 COORD_TYPE_X(TEX_NORMALIZED),
 			 COORD_TYPE_Y(TEX_NORMALIZED),
@@ -3866,7 +3818,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 SRC_SEL_Z(SQ_SEL_0),
 			 SRC_SEL_W(SQ_SEL_1));
     ps[i++] = TEX_DWORD_PAD;
-    // 24/25 - tex 2
+    /* 24/25 - tex 2 */
     ps[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			 BC_FRAC_MODE(0),
 			 FETCH_WHOLE_QUAD(0),
@@ -3876,10 +3828,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 R7xx_ALT_CONST(0));
     ps[i++] = TEX_DWORD1(DST_GPR(1),
 			 DST_REL(ABSOLUTE),
-			 DST_SEL_X(SQ_SEL_MASK), //R
-			 DST_SEL_Y(SQ_SEL_X),    //G
-			 DST_SEL_Z(SQ_SEL_MASK), //B
-			 DST_SEL_W(SQ_SEL_MASK), //A
+			 DST_SEL_X(SQ_SEL_MASK), /* R */
+			 DST_SEL_Y(SQ_SEL_X),    /* G */
+			 DST_SEL_Z(SQ_SEL_MASK), /* B */
+			 DST_SEL_W(SQ_SEL_MASK), /* A */
 			 LOD_BIAS(0),
 			 COORD_TYPE_X(TEX_NORMALIZED),
 			 COORD_TYPE_Y(TEX_NORMALIZED),
@@ -3895,9 +3847,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 SRC_SEL_W(SQ_SEL_1));
     ps[i++] = TEX_DWORD_PAD;
 
-    // comp mask vs ---------------------------------------
+    /* comp mask vs --------------------------------------- */
     i = accel_state->comp_mask_vs_offset / 4;
-    //0
+    /* 0 */
     vs[i++] = CF_DWORD0(ADDR(4));
     vs[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -3909,7 +3861,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_VTX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    //1 - dst
+    /* 1 - dst */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
 				      TYPE(SQ_EXPORT_POS),
 				      RW_GPR(2),
@@ -3927,7 +3879,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    //2 - src
+    /* 2 - src */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(1),
@@ -3945,7 +3897,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //3 - mask
+    /* 3 - mask */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(0),
@@ -3963,7 +3915,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //4/5 - dst
+    /* 4/5 - dst */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -3979,16 +3931,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(0),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(1));
     vs[i++] = VTX_DWORD_PAD;
-    //6/7 - src
+    /* 6/7 - src */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4004,16 +3956,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(8),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
-    //8/9 - mask
+    /* 8/9 - mask */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4029,9 +3981,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(16),
 			 ENDIAN_SWAP(ENDIAN_NONE),
@@ -4039,9 +3991,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
 
-    // comp mask vs ---------------------------------------
+    /* comp mask vs --------------------------------------- */
     i = accel_state->comp_mask_vs_offset / 4;
-    //0
+    /* 0 */
     vs[i++] = CF_DWORD0(ADDR(4));
     vs[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -4053,7 +4005,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_VTX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    //1 - dst
+    /* 1 - dst */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
 				      TYPE(SQ_EXPORT_POS),
 				      RW_GPR(2),
@@ -4071,7 +4023,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    //2 - src
+    /* 2 - src */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(1),
@@ -4089,7 +4041,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //3 - mask
+    /* 3 - mask */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(0),
@@ -4107,7 +4059,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //4/5 - dst
+    /* 4/5 - dst */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4123,16 +4075,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(0),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(1));
     vs[i++] = VTX_DWORD_PAD;
-    //6/7 - src
+    /* 6/7 - src */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4148,16 +4100,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(8),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
-    //8/9 - mask
+    /* 8/9 - mask */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4173,9 +4125,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(16),
 			 ENDIAN_SWAP(ENDIAN_NONE),
@@ -4183,12 +4135,12 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
 
-    // comp mask ps ---------------------------------------
-    // not yet
+    // comp mask ps --------------------------------------- */
+    /* not yet */
 
-    // comp vs ---------------------------------------
+    /* comp vs --------------------------------------- */
     i = accel_state->comp_vs_offset / 4;
-    //0
+    /* 0 */
     vs[i++] = CF_DWORD0(ADDR(4));
     vs[i++] = CF_DWORD1(POP_COUNT(0),
 			CF_CONST(0),
@@ -4200,7 +4152,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			CF_INST(SQ_CF_INST_VTX),
 			WHOLE_QUAD_MODE(0),
 			BARRIER(1));
-    //1 - dst
+    /* 1 - dst */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
 				      TYPE(SQ_EXPORT_POS),
 				      RW_GPR(1),
@@ -4218,7 +4170,7 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(1));
-    //2 - src
+    /* 2 - src */
     vs[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
 				      TYPE(SQ_EXPORT_PARAM),
 				      RW_GPR(0),
@@ -4236,10 +4188,10 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 					   CF_INST(SQ_CF_INST_EXPORT_DONE),
 					   WHOLE_QUAD_MODE(0),
 					   BARRIER(0));
-    //3
+    /* 3 */
     vs[i++] = 0x00000000;
     vs[i++] = 0x00000000;
-    //4/5 - dst
+    /* 4/5 - dst */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4255,16 +4207,16 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(0),
 			 ENDIAN_SWAP(ENDIAN_NONE),
 			 CONST_BUF_NO_STRIDE(0),
 			 MEGA_FETCH(1));
     vs[i++] = VTX_DWORD_PAD;
-    //6/7 - src
+    /* 6/7 - src */
     vs[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			 FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			 FETCH_WHOLE_QUAD(0),
@@ -4280,9 +4232,9 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			     DST_SEL_Z(SQ_SEL_0),
 			     DST_SEL_W(SQ_SEL_1),
 			     USE_CONST_FIELDS(0),
-			     DATA_FORMAT(FMT_32_32_FLOAT), //xxx
-			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM), //xxx
-			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED), //xxx
+			     DATA_FORMAT(FMT_32_32_FLOAT),
+			     NUM_FORMAT_ALL(SQ_NUM_FORMAT_NORM),
+			     FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
 			     SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
     vs[i++] = VTX_DWORD2(OFFSET(8),
 			 ENDIAN_SWAP(ENDIAN_NONE),
@@ -4290,8 +4242,8 @@ R600LoadShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
 			 MEGA_FETCH(0));
     vs[i++] = VTX_DWORD_PAD;
 
-    // comp ps ---------------------------------------
-    // not yet
+    /* comp ps --------------------------------------- */
+    /* not yet */
 
 
     return TRUE;
@@ -4304,7 +4256,7 @@ R600PrepareAccess(PixmapPtr pPix, int index)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     unsigned char *RADEONMMIO = info->MMIO;
 
-    //flush HDP read/write caches
+    /* flush HDP read/write caches */
     OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
 
     return TRUE;
@@ -4317,7 +4269,7 @@ R600FinishAccess(PixmapPtr pPix, int index)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     unsigned char *RADEONMMIO = info->MMIO;
 
-    //flush HDP read/write caches
+    /* flush HDP read/write caches */
     OUTREG(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
 
 }
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 0932bc8..bf98ec7 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -117,9 +117,9 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     int uv_offset;
 
     static float ps_alu_consts[] = {
-        1.0,  0.0,      1.4020,    0,  // r - c[0]
-        1.0, -0.34414, -0.71414,  0,  // g - c[1]
-        1.0,  1.7720,   0.0,        0,  // b - c[2]
+        1.0,  0.0,      1.4020,   0,  /* r - c[0] */
+        1.0, -0.34414, -0.71414,  0,  /* g - c[1] */
+        1.0,  1.7720,   0.0,      0,  /* b - c[2] */
 	/* Constants for undoing Y'CbCr scaling
 	 *  - Y' is scaled from 16:235
 	 *  - Cb/Cr are scaled from 16:240
@@ -138,7 +138,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
     accel_state->src_pitch[0] = pPriv->src_pitch;
 
-    // bad pitch
+    /* bad pitch */
     if (accel_state->src_pitch[0] & 7)
 	return;
     if (accel_state->dst_pitch & 7)
@@ -157,8 +157,6 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     /* Init */
     start_3d(pScrn, accel_state->ib);
 
-    //cp_set_surface_sync(pScrn, accel_state->ib);
-
     set_default_state(pScrn, accel_state->ib);
 
     /* Scissor / viewport */
@@ -208,7 +206,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     ps_conf.export_mode         = 2;
     ps_setup                    (pScrn, accel_state->ib, &ps_conf);
 
-    // PS alu constants
+    /* PS alu constants */
     set_alu_consts(pScrn, accel_state->ib, 0, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts);
 
     /* Texture */
@@ -222,7 +220,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
 			    accel_state->src_mc_addr[0]);
 
-	// Y texture
+	/* Y texture */
 	tex_res.id                  = 0;
 	tex_res.w                   = pPriv->w;
 	tex_res.h                   = pPriv->h;
@@ -233,7 +231,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.mip_base            = accel_state->src_mc_addr[0];
 
 	tex_res.format              = FMT_8;
-	tex_res.dst_sel_x           = SQ_SEL_X; //Y
+	tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
 	tex_res.dst_sel_y           = SQ_SEL_1;
 	tex_res.dst_sel_z           = SQ_SEL_1;
 	tex_res.dst_sel_w           = SQ_SEL_1;
@@ -245,13 +243,13 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.interlaced          = 0;
 	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
-	// Y sampler
+	/* Y sampler */
 	tex_samp.id                 = 0;
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_z            = SQ_TEX_WRAP;
 
-	// xxx: switch to bicubic
+	/* xxx: switch to bicubic */
 	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
 	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
 
@@ -259,7 +257,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_samp.mip_filter         = 0;			/* no mipmap */
 	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 
-	// U or V texture
+	/* U or V texture */
 	uv_offset = accel_state->src_pitch[0] * pPriv->h;
 	uv_offset = (uv_offset + 255) & ~255;
 
@@ -272,21 +270,21 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.w                   = pPriv->w >> 1;
 	tex_res.h                   = pPriv->h >> 1;
 	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
-	tex_res.dst_sel_x           = SQ_SEL_X; //V or U
+	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
 	tex_res.dst_sel_y           = SQ_SEL_1;
 	tex_res.dst_sel_z           = SQ_SEL_1;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 	tex_res.interlaced          = 0;
-	// XXX tex bases need to be 256B aligned
+
 	tex_res.base                = accel_state->src_mc_addr[0] + uv_offset;
 	tex_res.mip_base            = accel_state->src_mc_addr[0] + uv_offset;
 	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
-	// U or V sampler
+	/* U or V sampler */
 	tex_samp.id                 = 1;
 	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 
-	// U or V texture
+	/* U or V texture */
 	uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1));
 	uv_offset = (uv_offset + 255) & ~255;
 
@@ -299,17 +297,17 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.w                   = pPriv->w >> 1;
 	tex_res.h                   = pPriv->h >> 1;
 	tex_res.pitch               = accel_state->src_pitch[0] >> 1;
-	tex_res.dst_sel_x           = SQ_SEL_X; //V or U
+	tex_res.dst_sel_x           = SQ_SEL_X; /* V or U */
 	tex_res.dst_sel_y           = SQ_SEL_1;
 	tex_res.dst_sel_z           = SQ_SEL_1;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 	tex_res.interlaced          = 0;
-	// XXX tex bases need to be 256B aligned
+
 	tex_res.base                = accel_state->src_mc_addr[0] + uv_offset;
 	tex_res.mip_base            = accel_state->src_mc_addr[0] + uv_offset;
 	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
-	// UV sampler
+	/* UV sampler */
 	tex_samp.id                 = 2;
 	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 	break;
@@ -323,7 +321,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0],
 			    accel_state->src_mc_addr[0]);
 
-	// Y texture
+	/* Y texture */
 	tex_res.id                  = 0;
 	tex_res.w                   = pPriv->w;
 	tex_res.h                   = pPriv->h;
@@ -335,9 +333,9 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 
 	tex_res.format              = FMT_8_8;
 	if (pPriv->id == FOURCC_UYVY)
-	    tex_res.dst_sel_x           = SQ_SEL_Y; //Y
+	    tex_res.dst_sel_x           = SQ_SEL_Y; /* Y */
 	else
-	    tex_res.dst_sel_x           = SQ_SEL_X; //Y
+	    tex_res.dst_sel_x           = SQ_SEL_X; /* Y */
 	tex_res.dst_sel_y           = SQ_SEL_1;
 	tex_res.dst_sel_z           = SQ_SEL_1;
 	tex_res.dst_sel_w           = SQ_SEL_1;
@@ -349,13 +347,13 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_res.interlaced          = 0;
 	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
-	// Y sampler
+	/* Y sampler */
 	tex_samp.id                 = 0;
 	tex_samp.clamp_x            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_y            = SQ_TEX_CLAMP_LAST_TEXEL;
 	tex_samp.clamp_z            = SQ_TEX_WRAP;
 
-	// xxx: switch to bicubic
+	/* xxx: switch to bicubic */
 	tex_samp.xy_mag_filter      = SQ_TEX_XY_FILTER_BILINEAR;
 	tex_samp.xy_min_filter      = SQ_TEX_XY_FILTER_BILINEAR;
 
@@ -363,28 +361,28 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	tex_samp.mip_filter         = 0;			/* no mipmap */
 	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 
-	// UV texture
+	/* UV texture */
 	tex_res.id                  = 1;
 	tex_res.format              = FMT_8_8_8_8;
 	tex_res.w                   = pPriv->w >> 1;
 	tex_res.h                   = pPriv->h;
 	tex_res.pitch               = accel_state->src_pitch[0] >> 2;
 	if (pPriv->id == FOURCC_UYVY) {
-	    tex_res.dst_sel_x           = SQ_SEL_X; //V
-	    tex_res.dst_sel_y           = SQ_SEL_Z; //U
+	    tex_res.dst_sel_x           = SQ_SEL_X; /* V */
+	    tex_res.dst_sel_y           = SQ_SEL_Z; /* U */
 	} else {
-	    tex_res.dst_sel_x           = SQ_SEL_Y; //V
-	    tex_res.dst_sel_y           = SQ_SEL_W; //U
+	    tex_res.dst_sel_x           = SQ_SEL_Y; /* V */
+	    tex_res.dst_sel_y           = SQ_SEL_W; /* U */
 	}
 	tex_res.dst_sel_z           = SQ_SEL_1;
 	tex_res.dst_sel_w           = SQ_SEL_1;
 	tex_res.interlaced          = 0;
-	// XXX tex bases need to be 256B aligned
+
 	tex_res.base                = accel_state->src_mc_addr[0];
 	tex_res.mip_base            = accel_state->src_mc_addr[0];
 	set_tex_resource            (pScrn, accel_state->ib, &tex_res);
 
-	// UV sampler
+	/* UV sampler */
 	tex_samp.id                 = 1;
 	set_tex_sampler             (pScrn, accel_state->ib, &tex_samp);
 	break;
@@ -407,15 +405,15 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     case 16:
 	if (pPixmap->drawable.depth == 15) {
 	    cb_conf.format = COLOR_1_5_5_5;
-	    cb_conf.comp_swap = 1; //ARGB
+	    cb_conf.comp_swap = 1; /* ARGB */
 	} else {
 	    cb_conf.format = COLOR_5_6_5;
-	    cb_conf.comp_swap = 2; //RGB
+	    cb_conf.comp_swap = 2; /* RGB */
 	}
 	break;
     case 32:
 	cb_conf.format = COLOR_8_8_8_8;
-	cb_conf.comp_swap = 1; //ARGB
+	cb_conf.comp_swap = 1; /* ARGB */
 	break;
     default:
 	return;
@@ -432,7 +430,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 								DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
 
     /* Interpolator setup */
-    // export tex coords from VS
+    /* export tex coords from VS */
     EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift));
     EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift));
 


More information about the xorg-commit mailing list