xf86-video-ati: Branch 'master' - 2 commits

Alex Deucher agd5f at kemper.freedesktop.org
Fri Jul 30 14:25:22 PDT 2010


 src/r600_exa.c    |   19 -
 src/r600_shader.c |  860 ++++++++++++++++++++++++++++--------------------------
 src/r600_shader.h |    1 
 src/radeon.h      |    1 
 4 files changed, 458 insertions(+), 423 deletions(-)

New commits:
commit 82254b59268140c4102ae3cd713743ae2be15c00
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Fri Jul 30 17:15:05 2010 -0400

    r6xx/r7xx: unify composite mask and non-mask pixel shader

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 89d5877..72c4ff8 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1427,7 +1427,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 			       &src_obj,
 			       &mask_obj,
 			       &dst_obj,
-			       accel_state->comp_vs_offset, accel_state->comp_mask_ps_offset,
+			       accel_state->comp_vs_offset, accel_state->comp_ps_offset,
 			       3, 0xffffffff))
 	    return FALSE;
 
@@ -1491,10 +1491,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
     } else
         accel_state->is_transform[1] = FALSE;
 
-    if (pMask)
+    if (pMask) {
 	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0));
-    else
+	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0));
+    } else {
 	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0));
+	set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0));
+    }
 
     /* Shader */
 
@@ -1516,7 +1519,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 
     ps_conf.shader_addr         = accel_state->ps_mc_addr;
     ps_conf.num_gprs            = 3;
-    ps_conf.stack_size          = 0;
+    ps_conf.stack_size          = 1;
     ps_conf.uncached_first_inst = 1;
     ps_conf.clamp_consts        = 0;
     ps_conf.export_mode         = 2;
@@ -2224,16 +2227,12 @@ R600LoadShaders(ScrnInfoPtr pScrn)
     accel_state->comp_ps_offset = 2560;
     R600_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
 
-    /*  comp mask ps --------------------------------------- */
-    accel_state->comp_mask_ps_offset = 3072;
-    R600_comp_mask_ps(ChipSet, shader + accel_state->comp_mask_ps_offset / 4);
-
     /*  xv vs --------------------------------------- */
-    accel_state->xv_vs_offset = 3584;
+    accel_state->xv_vs_offset = 3072;
     R600_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
 
     /*  xv ps --------------------------------------- */
-    accel_state->xv_ps_offset = 4096;
+    accel_state->xv_ps_offset = 3584;
     R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
 
 #ifdef XF86DRM_MODE
diff --git a/src/r600_shader.c b/src/r600_shader.c
index 47bc007..e2a4163 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1149,230 +1149,6 @@ int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     return i;
 }
 
-/* comp mask ps --------------------------------------- */
-int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* shader)
-{
-    int i = 0;
-
-    /* 0 */
-    shader[i++] = CF_DWORD0(ADDR(8));
-    shader[i++] = CF_DWORD1(POP_COUNT(0),
-			    CF_CONST(0),
-			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(2),
-			    CALL_COUNT(0),
-			    END_OF_PROGRAM(0),
-			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_TEX),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-
-    /* 1 */
-    shader[i++] = CF_ALU_DWORD0(ADDR(3),
-				KCACHE_BANK0(0),
-				KCACHE_BANK1(0),
-				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
-    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
-				KCACHE_ADDR0(0),
-				KCACHE_ADDR1(0),
-				I_COUNT(4),
-				USES_WATERFALL(0),
-				CF_INST(SQ_CF_INST_ALU),
-				WHOLE_QUAD_MODE(0),
-				BARRIER(1));
-
-    /* 2 */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
-					  TYPE(SQ_EXPORT_PIXEL),
-					  RW_GPR(2),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(1));
-
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_Z),
-					       SRC_SEL_W(SQ_SEL_W),
-					       R6xx_ELEM_LOOP(0),
-					       BURST_COUNT(1),
-					       END_OF_PROGRAM(1),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       WHOLE_QUAD_MODE(0),
-					       BARRIER(1));
-
-    /* 3 - alu 0 */
-    /* MUL gpr[2].x gpr[1].x gpr[0].x */
-    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_X),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_X),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(0));
-    shader[i++] = ALU_DWORD1_OP2(ChipSet,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_X),
-				 CLAMP(1));
-    /* 4 - alu 1 */
-    /* MUL gpr[2].y gpr[1].y gpr[0].y */
-    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_Y),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_Y),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(0));
-    shader[i++] = ALU_DWORD1_OP2(ChipSet,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_Y),
-				 CLAMP(1));
-    /* 5 - alu 2 */
-    /* MUL gpr[2].z gpr[1].z gpr[0].z */
-    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_Z),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_Z),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(0));
-    shader[i++] = ALU_DWORD1_OP2(ChipSet,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_Z),
-				 CLAMP(1));
-    /* 6 - alu 3 */
-    /* MUL gpr[2].w gpr[1].w gpr[0].w */
-    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
-			     SRC0_REL(ABSOLUTE),
-			     SRC0_ELEM(ELEM_W),
-			     SRC0_NEG(0),
-			     SRC1_SEL(0),
-			     SRC1_REL(ABSOLUTE),
-			     SRC1_ELEM(ELEM_W),
-			     SRC1_NEG(0),
-			     INDEX_MODE(SQ_INDEX_LOOP),
-			     PRED_SEL(SQ_PRED_SEL_OFF),
-			     LAST(1));
-    shader[i++] = ALU_DWORD1_OP2(ChipSet,
-				 SRC0_ABS(0),
-				 SRC1_ABS(0),
-				 UPDATE_EXECUTE_MASK(0),
-				 UPDATE_PRED(0),
-				 WRITE_MASK(1),
-				 FOG_MERGE(0),
-				 OMOD(SQ_ALU_OMOD_OFF),
-				 ALU_INST(SQ_OP2_INST_MUL),
-				 BANK_SWIZZLE(SQ_ALU_VEC_012),
-				 DST_GPR(2),
-				 DST_REL(ABSOLUTE),
-				 DST_ELEM(ELEM_W),
-				 CLAMP(1));
-    /* 7 */
-    shader[i++] = 0x00000000;
-    shader[i++] = 0x00000000;
-
-    /* 8/9 - src */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			     BC_FRAC_MODE(0),
-			     FETCH_WHOLE_QUAD(0),
-			     RESOURCE_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     R7xx_ALT_CONST(0));
-    shader[i++] = TEX_DWORD1(DST_GPR(0),
-			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_Z),
-			     DST_SEL_W(SQ_SEL_W),
-			     LOD_BIAS(0),
-			     COORD_TYPE_X(TEX_NORMALIZED),
-			     COORD_TYPE_Y(TEX_NORMALIZED),
-			     COORD_TYPE_Z(TEX_NORMALIZED),
-			     COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-			     OFFSET_Y(0),
-			     OFFSET_Z(0),
-			     SAMPLER_ID(0),
-			     SRC_SEL_X(SQ_SEL_X),
-			     SRC_SEL_Y(SQ_SEL_Y),
-			     SRC_SEL_Z(SQ_SEL_0),
-			     SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
-    /* 10/11 - mask */
-    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
-			     BC_FRAC_MODE(0),
-			     FETCH_WHOLE_QUAD(0),
-			     RESOURCE_ID(1),
-			     SRC_GPR(1),
-			     SRC_REL(ABSOLUTE),
-			     R7xx_ALT_CONST(0));
-    shader[i++] = TEX_DWORD1(DST_GPR(1),
-			     DST_REL(ABSOLUTE),
-			     DST_SEL_X(SQ_SEL_X),
-			     DST_SEL_Y(SQ_SEL_Y),
-			     DST_SEL_Z(SQ_SEL_Z),
-			     DST_SEL_W(SQ_SEL_W),
-			     LOD_BIAS(0),
-			     COORD_TYPE_X(TEX_NORMALIZED),
-			     COORD_TYPE_Y(TEX_NORMALIZED),
-			     COORD_TYPE_Z(TEX_NORMALIZED),
-			     COORD_TYPE_W(TEX_NORMALIZED));
-    shader[i++] = TEX_DWORD2(OFFSET_X(0),
-			     OFFSET_Y(0),
-			     OFFSET_Z(0),
-			     SAMPLER_ID(1),
-			     SRC_SEL_X(SQ_SEL_X),
-			     SRC_SEL_Y(SQ_SEL_Y),
-			     SRC_SEL_Z(SQ_SEL_0),
-			     SRC_SEL_W(SQ_SEL_1));
-    shader[i++] = TEX_DWORD_PAD;
-
-    return i;
-}
-
 /* comp vs --------------------------------------- */
 int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 {
@@ -2152,7 +1928,102 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
     int i = 0;
 
     /* 0 */
-    shader[i++] = CF_DWORD0(ADDR(2));
+    shader[i++] = CF_DWORD0(ADDR(3));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(7));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(0));
+    /* 2 */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(0),
+                            CALL_COUNT(0),
+                            END_OF_PROGRAM(1),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_NOP),
+                            WHOLE_QUAD_MODE(0),
+                            BARRIER(1));
+
+    /* 3 - mask sub */
+    shader[i++] = CF_DWORD0(ADDR(14));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TEX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 4 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(10),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				USES_WATERFALL(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 5 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(1),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* 6 */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 7 non-mask sub */
+    shader[i++] = CF_DWORD0(ADDR(18));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
 			    COND(SQ_CF_COND_ACTIVE),
@@ -2163,28 +2034,204 @@ int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
 			    CF_INST(SQ_CF_INST_TEX),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
-    /* 1 */
+    /* 8 */
     shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
 					  TYPE(SQ_EXPORT_PIXEL),
 					  RW_GPR(0),
 					  RW_REL(ABSOLUTE),
 					  INDEX_GPR(0),
 					  ELEM_SIZE(1));
-
     shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
 					       SRC_SEL_Y(SQ_SEL_Y),
 					       SRC_SEL_Z(SQ_SEL_Z),
 					       SRC_SEL_W(SQ_SEL_W),
 					       R6xx_ELEM_LOOP(0),
 					       BURST_COUNT(1),
-					       END_OF_PROGRAM(1),
+					       END_OF_PROGRAM(0),
 					       VALID_PIXEL_MODE(0),
 					       CF_INST(SQ_CF_INST_EXPORT_DONE),
 					       WHOLE_QUAD_MODE(0),
 					       BARRIER(1));
+    /* 9 */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 10 - alu 0 */
+    /* MUL gpr[2].x gpr[1].x gpr[0].x */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 11 - alu 1 */
+    /* MUL gpr[2].y gpr[1].y gpr[0].y */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 12 - alu 2 */
+    /* MUL gpr[2].z gpr[1].z gpr[0].z */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 13 - alu 3 */
+    /* MUL gpr[2].w gpr[1].w gpr[0].w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(1),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(ChipSet,
+				 SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 FOG_MERGE(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
 
+    /* 14/15 - src - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     BC_FRAC_MODE(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 16/17 - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     BC_FRAC_MODE(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(1),
+			     SRC_GPR(1),
+			     SRC_REL(ABSOLUTE),
+			     R7xx_ALT_CONST(0));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(1),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
 
-    /* 2/3 - src */
+    /* 18/19 - src - non-mask */
     shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
 			     BC_FRAC_MODE(0),
 			     FETCH_WHOLE_QUAD(0),
diff --git a/src/r600_shader.h b/src/r600_shader.h
index 6c12614..a68d6c2 100644
--- a/src/r600_shader.h
+++ b/src/r600_shader.h
@@ -353,7 +353,6 @@ extern int R600_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
 extern int R600_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
 
 extern int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
-extern int R600_comp_mask_ps(RADEONChipFamily ChipSet, uint32_t* ps);
 extern int R600_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
 
 #endif
diff --git a/src/radeon.h b/src/radeon.h
index 56bc076..61f07ba 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -751,7 +751,6 @@ struct radeon_accel_state {
     uint32_t          copy_ps_offset;
     uint32_t          comp_vs_offset;
     uint32_t          comp_ps_offset;
-    uint32_t          comp_mask_ps_offset;
     uint32_t          xv_vs_offset;
     uint32_t          xv_ps_offset;
 
commit 1c17f3a192f644e8e38b5cfb1470f49434bfba27
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Fri Jul 30 16:34:54 2010 -0400

    r6xx/r7xx: clean up composite vertex shader
    
    keep CF, ALU, Fetch instructions in separate groups

diff --git a/src/r600_shader.c b/src/r600_shader.c
index 7e25f6d..47bc007 100644
--- a/src/r600_shader.c
+++ b/src/r600_shader.c
@@ -1391,7 +1391,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                             WHOLE_QUAD_MODE(0),
                             BARRIER(0));
     /* 1 */
-    shader[i++] = CF_DWORD0(ADDR(28));
+    shader[i++] = CF_DWORD0(ADDR(9));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
                             CF_CONST(0),
                             COND(SQ_CF_COND_NOT_BOOL),
@@ -1415,7 +1415,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                             WHOLE_QUAD_MODE(0),
                             BARRIER(1));
     /* 3 - mask sub */
-    shader[i++] = CF_DWORD0(ADDR(22));
+    shader[i++] = CF_DWORD0(ADDR(32));
     shader[i++] = CF_DWORD1(POP_COUNT(0),
 			    CF_CONST(0),
 			    COND(SQ_CF_COND_ACTIVE),
@@ -1428,7 +1428,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 			    BARRIER(1));
 
     /* 4 - ALU */
-    shader[i++] = CF_ALU_DWORD0(ADDR(9),
+    shader[i++] = CF_ALU_DWORD0(ADDR(14),
 				KCACHE_BANK0(0),
 				KCACHE_BANK1(0),
 				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
@@ -1507,9 +1507,84 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 			    CF_INST(SQ_CF_INST_RETURN),
 			    WHOLE_QUAD_MODE(0),
 			    BARRIER(1));
+    /* 9 - non-mask sub */
+    shader[i++] = CF_DWORD0(ADDR(38));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_VTX),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
+
+    /* 10 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(26),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(6),
+				USES_WATERFALL(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 11 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(1));
+    /* 12 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       R6xx_ELEM_LOOP(0),
+					       BURST_COUNT(0),
+					       END_OF_PROGRAM(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       WHOLE_QUAD_MODE(0),
+					       BARRIER(0));
+    /* 13 */
+    shader[i++] = CF_DWORD0(ADDR(0));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    CALL_COUNT(0),
+			    END_OF_PROGRAM(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    WHOLE_QUAD_MODE(0),
+			    BARRIER(1));
 
 
-    /* 9 srcX MAD */
+    /* 14 srcX MAD - mask */
     shader[i++] = ALU_DWORD0(SRC0_SEL(256),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1531,7 +1606,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_Z),
                                  CLAMP(0));
-    /* 10 srcY MAD */
+    /* 15 srcY MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(257),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1554,7 +1629,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_W),
                                  CLAMP(0));
 
-    /* 11 srcX MAD */
+    /* 16 srcX MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(256),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -1576,7 +1651,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_X),
                                  CLAMP(0));
-    /* 12 srcY MAD */
+    /* 17 srcY MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(257),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -1599,7 +1674,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_Y),
                                  CLAMP(0));
 
-    /* 13 maskX MAD */
+    /* 18 maskX MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(258),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1622,7 +1697,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_Z),
                                  CLAMP(0));
 
-    /* 14 maskY MAD */
+    /* 19 maskY MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(259),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1645,7 +1720,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_W),
                                  CLAMP(0));
 
-    /* 15 srcX MAD */
+    /* 20 srcX MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(258),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -1667,7 +1742,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_X),
                                  CLAMP(0));
-    /* 16 srcY MAD */
+    /* 21 srcY MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(259),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -1690,7 +1765,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_Y),
                                  CLAMP(0));
 
-    /* 17 srcX / w */
+    /* 22 srcX / w */
     shader[i++] = ALU_DWORD0(SRC0_SEL(1),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -1717,7 +1792,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_X),
                                  CLAMP(0));
 
-    /* 18 srcY / h */
+    /* 23 srcY / h */
     shader[i++] = ALU_DWORD0(SRC0_SEL(1),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1744,7 +1819,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_Y),
                                  CLAMP(0));
 
-    /* 19 maskX / w */
+    /* 24 maskX / w */
     shader[i++] = ALU_DWORD0(SRC0_SEL(0),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -1771,7 +1846,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_X),
                                  CLAMP(0));
 
-    /* 20 maskY / h */
+    /* 25 maskY / h */
     shader[i++] = ALU_DWORD0(SRC0_SEL(0),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1797,164 +1872,8 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_Y),
                                  CLAMP(0));
-    /* 21 */
-    shader[i++] = 0x00000000;
-    shader[i++] = 0x00000000;
-
-    /* 22/23 - dst */
-    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			     FETCH_WHOLE_QUAD(0),
-			     BUFFER_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     SRC_SEL_X(SQ_SEL_X),
-			     MEGA_FETCH_COUNT(24));
-    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
-				 DST_REL(0),
-				 DST_SEL_X(SQ_SEL_X),
-				 DST_SEL_Y(SQ_SEL_Y),
-				 DST_SEL_Z(SQ_SEL_0),
-				 DST_SEL_W(SQ_SEL_1),
-				 USE_CONST_FIELDS(0),
-				 DATA_FORMAT(FMT_32_32_FLOAT),
-				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
-				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    shader[i++] = VTX_DWORD2(OFFSET(0),
-			     ENDIAN_SWAP(ENDIAN_NONE),
-			     CONST_BUF_NO_STRIDE(0),
-			     MEGA_FETCH(1));
-    shader[i++] = VTX_DWORD_PAD;
-    /* 24/25 - src */
-    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			     FETCH_WHOLE_QUAD(0),
-			     BUFFER_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     SRC_SEL_X(SQ_SEL_X),
-			     MEGA_FETCH_COUNT(8));
-    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
-				 DST_REL(0),
-				 DST_SEL_X(SQ_SEL_X),
-				 DST_SEL_Y(SQ_SEL_Y),
-				 DST_SEL_Z(SQ_SEL_1),
-				 DST_SEL_W(SQ_SEL_0),
-				 USE_CONST_FIELDS(0),
-				 DATA_FORMAT(FMT_32_32_FLOAT),
-				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
-				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    shader[i++] = VTX_DWORD2(OFFSET(8),
-			     ENDIAN_SWAP(ENDIAN_NONE),
-			     CONST_BUF_NO_STRIDE(0),
-			     MEGA_FETCH(0));
-    shader[i++] = VTX_DWORD_PAD;
-    /* 26/27 - mask */
-    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
-			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
-			     FETCH_WHOLE_QUAD(0),
-			     BUFFER_ID(0),
-			     SRC_GPR(0),
-			     SRC_REL(ABSOLUTE),
-			     SRC_SEL_X(SQ_SEL_X),
-			     MEGA_FETCH_COUNT(8));
-    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
-				 DST_REL(0),
-				 DST_SEL_X(SQ_SEL_X),
-				 DST_SEL_Y(SQ_SEL_Y),
-				 DST_SEL_Z(SQ_SEL_1),
-				 DST_SEL_W(SQ_SEL_0),
-				 USE_CONST_FIELDS(0),
-				 DATA_FORMAT(FMT_32_32_FLOAT),
-				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
-				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
-				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
-    shader[i++] = VTX_DWORD2(OFFSET(16),
-			     ENDIAN_SWAP(ENDIAN_NONE),
-			     CONST_BUF_NO_STRIDE(0),
-			     MEGA_FETCH(0));
-    shader[i++] = VTX_DWORD_PAD;
 
-    /* 28 - non-mask sub */
-    shader[i++] = CF_DWORD0(ADDR(40));
-    shader[i++] = CF_DWORD1(POP_COUNT(0),
-			    CF_CONST(0),
-			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(2),
-			    CALL_COUNT(0),
-			    END_OF_PROGRAM(0),
-			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_VTX),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-
-    /* 29 - ALU */
-    shader[i++] = CF_ALU_DWORD0(ADDR(33),
-				KCACHE_BANK0(0),
-				KCACHE_BANK1(0),
-				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
-    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
-				KCACHE_ADDR0(0),
-				KCACHE_ADDR1(0),
-				I_COUNT(6),
-				USES_WATERFALL(0),
-				CF_INST(SQ_CF_INST_ALU),
-				WHOLE_QUAD_MODE(0),
-				BARRIER(1));
-
-    /* 30 - dst */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
-					  TYPE(SQ_EXPORT_POS),
-					  RW_GPR(1),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(0));
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_0),
-					       SRC_SEL_W(SQ_SEL_1),
-					       R6xx_ELEM_LOOP(0),
-					       BURST_COUNT(0),
-					       END_OF_PROGRAM(0),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       WHOLE_QUAD_MODE(0),
-					       BARRIER(1));
-    /* 31 - src */
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
-					  TYPE(SQ_EXPORT_PARAM),
-					  RW_GPR(0),
-					  RW_REL(ABSOLUTE),
-					  INDEX_GPR(0),
-					  ELEM_SIZE(0));
-    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
-					       SRC_SEL_Y(SQ_SEL_Y),
-					       SRC_SEL_Z(SQ_SEL_0),
-					       SRC_SEL_W(SQ_SEL_1),
-					       R6xx_ELEM_LOOP(0),
-					       BURST_COUNT(0),
-					       END_OF_PROGRAM(0),
-					       VALID_PIXEL_MODE(0),
-					       CF_INST(SQ_CF_INST_EXPORT_DONE),
-					       WHOLE_QUAD_MODE(0),
-					       BARRIER(0));
-    /* 32 */
-    shader[i++] = CF_DWORD0(ADDR(0));
-    shader[i++] = CF_DWORD1(POP_COUNT(0),
-			    CF_CONST(0),
-			    COND(SQ_CF_COND_ACTIVE),
-			    I_COUNT(0),
-			    CALL_COUNT(0),
-			    END_OF_PROGRAM(0),
-			    VALID_PIXEL_MODE(0),
-			    CF_INST(SQ_CF_INST_RETURN),
-			    WHOLE_QUAD_MODE(0),
-			    BARRIER(1));
-
-
-    /* 33 srcX MAD */
+    /* 26 srcX MAD - non-mask */
     shader[i++] = ALU_DWORD0(SRC0_SEL(256),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1976,7 +1895,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_Z),
                                  CLAMP(0));
-    /* 34 srcY MAD */
+    /* 27 srcY MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(257),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -1999,7 +1918,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_W),
                                  CLAMP(0));
 
-    /* 35 srcX MAD */
+    /* 28 srcX MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(256),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -2021,7 +1940,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_X),
                                  CLAMP(0));
-    /* 36 srcY MAD */
+    /* 29 srcY MAD */
     shader[i++] = ALU_DWORD0(SRC0_SEL(257),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -2043,7 +1962,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_REL(ABSOLUTE),
                                  DST_ELEM(ELEM_Y),
                                  CLAMP(0));
-    /* 37 srcX / w */
+    /* 30 srcX / w */
     shader[i++] = ALU_DWORD0(SRC0_SEL(0),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_X),
@@ -2070,7 +1989,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_X),
                                  CLAMP(0));
 
-    /* 38 srcY / h */
+    /* 31 srcY / h */
     shader[i++] = ALU_DWORD0(SRC0_SEL(0),
                              SRC0_REL(ABSOLUTE),
                              SRC0_ELEM(ELEM_Y),
@@ -2097,11 +2016,83 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
                                  DST_ELEM(ELEM_Y),
                                  CLAMP(0));
 
-    /* 39 */
-    shader[i++] = 0x00000000;
-    shader[i++] = 0x00000000;
+    /* 32/33 - dst - mask */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(24));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(1));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 34/35 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 36/37 - mask */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     MEGA_FETCH_COUNT(8));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(16),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     MEGA_FETCH(0));
+    shader[i++] = VTX_DWORD_PAD;
 
-    /* 40/41 - dst */
+    /* 38/39 - dst - non-mask */
     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			     FETCH_WHOLE_QUAD(0),
@@ -2126,7 +2117,7 @@ int R600_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
 			     CONST_BUF_NO_STRIDE(0),
 			     MEGA_FETCH(1));
     shader[i++] = VTX_DWORD_PAD;
-    /* 42/43 - src */
+    /* 40/41 - src */
     shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
 			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
 			     FETCH_WHOLE_QUAD(0),


More information about the xorg-commit mailing list