xf86-video-ati: Branch 'master' - 12 commits

Alex Deucher agd5f at kemper.freedesktop.org
Fri Apr 11 08:02:39 PDT 2008


 src/radeon.h                     |   23 ++-
 src/radeon_accel.c               |   64 +++++++-
 src/radeon_commonfuncs.c         |  287 ++++++++++++++++++++++++++++++++++-----
 src/radeon_driver.c              |   17 ++
 src/radeon_exa_render.c          |  177 +++++++++++++++---------
 src/radeon_reg.h                 |   15 ++
 src/radeon_textured_videofuncs.c |  131 ++++++++++++-----
 7 files changed, 564 insertions(+), 150 deletions(-)

New commits:
commit f3e68d4b7afd2e23675bf6361c496814c9cb4b94
Author: Alex Deucher <alex at botch2.com>
Date:   Fri Apr 11 10:59:07 2008 -0400

    Fix exa glyph corruption on newer chips

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 2bc2c65..c1ea4d2 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1888,8 +1888,10 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
 		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
     } else {
-	if (IS_R300_3D || IS_R500_3D)
+	if (IS_R300_3D)
 	    BEGIN_RING(4 * vtx_count + 4);
+	else if (IS_R500_3D)
+	    BEGIN_RING(4 * vtx_count + 6);
 	else
 	    BEGIN_RING(4 * vtx_count + 2);
 
@@ -1901,8 +1903,10 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
     }
 
 #else /* ACCEL_CP */
-    if (IS_R300_3D || IS_R500_3D)
+    if (IS_R300_3D)
 	BEGIN_ACCEL(2 + vtx_count * 4);
+    else if (IS_R500_3D)
+	BEGIN_ACCEL(3 + vtx_count * 4);
     else
 	BEGIN_ACCEL(1 + vtx_count * 4);
 
@@ -1931,9 +1935,14 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 	    xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0],
 	    xFixedToFloat(maskTopRight.x) / info->texW[1],    xFixedToFloat(maskTopRight.y) / info->texH[1]);
 
-    if (IS_R300_3D || IS_R500_3D)
+    if (IS_R300_3D)
 	/* flushing is pipelined, free/finish is not */
 	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+    else if (IS_R500_3D) {
+	/* r500 shows corruption on small things like glyphs without a 3D idle */
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
+    }
 
 #ifdef ACCEL_CP
     ADVANCE_RING();
commit b59686d6427cbf8b35e36b020cbbc6a0c5149b22
Author: Alex Deucher <alex at botch2.com>
Date:   Fri Apr 11 10:15:25 2008 -0400

    R300+: pre-load vertex programs in init3D()

diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 5bc502c..025fb56 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -144,8 +144,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0);
 	FINISH_ACCEL();
 
-	/* setup the VAP */
-	
+	/* setup the VAP */	
 	if (info->has_tcl) {
 	    BEGIN_ACCEL(11);
 	    OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
@@ -171,6 +170,224 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	}
 	FINISH_ACCEL();
 
+	/* pre-load the vertex shaders */
+	if (info->has_tcl) {
+	    /* exa mask shader program */
+	    BEGIN_ACCEL(13);
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
+	    /* PVS inst 0 */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(0) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	    /* PVS inst 1 */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(1) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(1) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(1) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(1) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	    /* PVS inst 2 */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(2) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(2) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(2) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(2) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    FINISH_ACCEL();
+
+	    BEGIN_ACCEL(9);
+	    /* exa no mask instruction */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 3);
+	    /* PVS inst 0 */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(0) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	    /* PVS inst 1 */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(1) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(1) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(1) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(1) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    FINISH_ACCEL();
+
+	    /* Xv shader program */
+	    BEGIN_ACCEL(9);
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 5);
+
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(0) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(1) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(10) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(10) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(10) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    FINISH_ACCEL();
+	}
+
+
 	BEGIN_ACCEL(4);
 	OUT_ACCEL_REG(R300_US_W_FMT, 0);
 	OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED |
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index b806399..2bc2c65 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -26,6 +26,7 @@
  *    Eric Anholt <anholt at FreeBSD.org>
  *    Zack Rusin <zrusin at trolltech.com>
  *    Benjamin Herrenschmidt <benh at kernel.crashing.org>
+ *    Alex Deucher <alexander.deucher at amd.com>
  *
  */
 
@@ -1168,10 +1169,10 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     }
     FINISH_ACCEL();
 
-    /* setup the vertex shader */
+    /* load the vertex shader */
     if (info->has_tcl) {
+	BEGIN_ACCEL(2);
 	if (pMask) {
-	    BEGIN_ACCEL(15);
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
 			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
 			   (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
@@ -1179,104 +1180,13 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
 			  (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
 	} else {
-	    BEGIN_ACCEL(11);
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
-			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
-			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-			   (1 << R300_PVS_LAST_INST_SHIFT)));
+			  ((3 << R300_PVS_FIRST_INST_SHIFT) |
+			   (4 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			   (4 << R300_PVS_LAST_INST_SHIFT)));
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
-			  (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
-	}
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
-	/* PVS inst 0 */
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_DST_OPCODE(R300_VE_ADD) |
-		       R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-		       R300_PVS_DST_OFFSET(0) |
-		       R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
-		       R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-		       R300_PVS_SRC_OFFSET(0) |
-		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
-		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
-		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
-		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-		       R300_PVS_SRC_OFFSET(0) |
-		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-		       R300_PVS_SRC_OFFSET(0) |
-		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-
-	/* PVS inst 1 */
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_DST_OPCODE(R300_VE_ADD) |
-		       R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-		       R300_PVS_DST_OFFSET(1) |
-		       R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
-		       R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-		       R300_PVS_SRC_OFFSET(1) |
-		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
-		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
-		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
-		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-		       R300_PVS_SRC_OFFSET(1) |
-		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-		       R300_PVS_SRC_OFFSET(1) |
-		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-
-	if (pMask) {
-	    /* PVS inst 2 */
-	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
-			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-			   R300_PVS_DST_OFFSET(2) |
-			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
-			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
-	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(2) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
-	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(2) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(2) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+			  (4 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
 	}
-
 	FINISH_ACCEL();
     }
 
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 363490a..564f66c 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -193,7 +193,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	/* setup the VAP */
 	if (info->has_tcl) {
-	    BEGIN_VIDEO(16);
+	    BEGIN_VIDEO(7);
 	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
 			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
 			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -244,72 +244,14 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			    << R300_WRITE_ENA_1_SHIFT)));
 	}
 
-	/* setup vertex shader */
+	/* load the vertex shader */
 	if (info->has_tcl) {
 	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0,
-			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
-			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
-			   (1 << R300_PVS_LAST_INST_SHIFT)));
+			  ((5 << R300_PVS_FIRST_INST_SHIFT) |
+			   (6 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			   (6 << R300_PVS_LAST_INST_SHIFT)));
 	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1,
-			  (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
-
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
-			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-			   R300_PVS_DST_OFFSET(0) |
-			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
-			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(0) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(0) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(0) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
-			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
-			   R300_PVS_DST_OFFSET(1) |
-			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
-			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(10) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(10) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
-			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
-			   R300_PVS_SRC_OFFSET(10) |
-			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
-			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
-
+			  (6 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
 	}
 
 	OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
commit acc5833a35ad6c29a57f659607afb27eebdc2ea5
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 17:52:52 2008 -0400

    R3xx+: consolidate more tcl code

diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 69b3536..5bc502c 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -145,12 +145,14 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	FINISH_ACCEL();
 
 	/* setup the VAP */
-	BEGIN_ACCEL(5);
-	if (info->has_tcl)
+	
+	if (info->has_tcl) {
+	    BEGIN_ACCEL(11);
 	    OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
-	else
+	} else {
+	    BEGIN_ACCEL(5);
 	    OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-
+	}
 	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
 	OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
 				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
@@ -158,6 +160,15 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
 	OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
 	OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+
+	if (info->has_tcl) {
+	    OUT_ACCEL_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
+	    OUT_ACCEL_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
+	    OUT_ACCEL_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
+	    OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
+	    OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
+	    OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+	}
 	FINISH_ACCEL();
 
 	BEGIN_ACCEL(4);
@@ -186,7 +197,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_FG_ALPHA_FUNC, 0);
 	FINISH_ACCEL();
 
-	BEGIN_ACCEL(12);
+	BEGIN_ACCEL(13);
+	OUT_ACCEL_REG(R300_RB3D_ABLENDCNTL, 0);
 	OUT_ACCEL_REG(R300_RB3D_ZSTENCILCNTL, 0);
 	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
 	OUT_ACCEL_REG(R300_RB3D_BW_CNTL, 0);
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index fab8f97..b806399 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1051,10 +1051,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     CARD32 txenable, colorpitch;
     CARD32 blendcntl;
     int pixel_shift;
-    /*int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
-		   (info->ChipFamily != CHIP_FAMILY_RS740) &&
-		   (info->ChipFamily != CHIP_FAMILY_RS400) &&
-		   (info->ChipFamily != CHIP_FAMILY_RV515));*/
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -1175,10 +1171,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     /* setup the vertex shader */
     if (info->has_tcl) {
 	if (pMask) {
-	    BEGIN_ACCEL(22);
-	    /* flush the PVS before updating??? */
-	    OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-
+	    BEGIN_ACCEL(15);
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
 			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
 			   (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
@@ -1186,10 +1179,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
 			  (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
 	} else {
-	    BEGIN_ACCEL(18);
-	    /* flush the PVS before updating??? */
-	    OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-
+	    BEGIN_ACCEL(11);
 	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
 			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
 			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
@@ -1287,13 +1277,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
 	}
 
-	OUT_ACCEL_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
-
-	OUT_ACCEL_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
-	OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
 	FINISH_ACCEL();
     }
 
@@ -1871,14 +1854,13 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	FINISH_ACCEL();
     }
 
-    BEGIN_ACCEL(4);
+    BEGIN_ACCEL(3);
 
     OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
     OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
     OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
-    OUT_ACCEL_REG(R300_RB3D_ABLENDCNTL, 0);
 
     FINISH_ACCEL();
 
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 90f01e8..363490a 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -122,10 +122,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
     if (IS_R300_3D || IS_R500_3D) {
 	CARD32 output_fmt;
-	/*int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
-		       (info->ChipFamily != CHIP_FAMILY_RS740) &&
-		       (info->ChipFamily != CHIP_FAMILY_RS400) &&
-		       (info->ChipFamily != CHIP_FAMILY_RV515));*/
 
 	switch (pPixmap->drawable.bitsPerPixel) {
 	case 16:
@@ -196,12 +192,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	txenable = R300_TEX_0_ENABLE;
 
 	/* setup the VAP */
-	if (info->has_tcl)
-	    BEGIN_VIDEO(22);
-	else
-	    BEGIN_VIDEO(5);
-
 	if (info->has_tcl) {
+	    BEGIN_VIDEO(16);
 	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
 			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
 			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -226,6 +218,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
 			    << R300_WRITE_ENA_1_SHIFT)));
 	} else {
+	    BEGIN_VIDEO(5);
 	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
 			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
 			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -317,13 +310,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
 			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
 
-	    OUT_VIDEO_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
-
-	    OUT_VIDEO_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
-	    OUT_VIDEO_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
-	    OUT_VIDEO_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
-	    OUT_VIDEO_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
-	    OUT_VIDEO_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
 	}
 
 	OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
@@ -523,7 +509,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	    FINISH_VIDEO();
 	}
 
-	BEGIN_VIDEO(6);
+	BEGIN_VIDEO(5);
 	OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
 	OUT_VIDEO_REG(R300_TX_ENABLE, txenable);
 
@@ -531,8 +517,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	OUT_VIDEO_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
 	blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO;
+	/* no need to enable blending */
 	OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl);
-	OUT_VIDEO_REG(R300_RB3D_ABLENDCNTL, 0);
 	FINISH_VIDEO();
 
 	BEGIN_VIDEO(1);
commit 6f8f75bd19ef1919c0291141675be2d0e29b3251
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 17:08:50 2008 -0400

    R3xx+: consolidate some common 3D code

diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index deb2c99..69b3536 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -144,6 +144,22 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0);
 	FINISH_ACCEL();
 
+	/* setup the VAP */
+	BEGIN_ACCEL(5);
+	if (info->has_tcl)
+	    OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
+	else
+	    OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
+
+	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+	OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+	OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+	FINISH_ACCEL();
+
 	BEGIN_ACCEL(4);
 	OUT_ACCEL_REG(R300_US_W_FMT, 0);
 	OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED |
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 1158f89..fab8f97 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1096,23 +1096,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     RADEON_SWITCH_TO_3D();
 
     /* setup the VAP */
-    if (info->has_tcl) {
-	BEGIN_ACCEL(9);
-	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
-
-    } else {
-	BEGIN_ACCEL(9);
-	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-    }
-
-    OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-    OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
-				  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-				  (4 << R300_PVS_NUM_FPUS_SHIFT) |
-				  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
-    OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
-    OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
-
+    BEGIN_ACCEL(4);
     if (info->has_tcl) {
 	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
 		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index dbf8562..90f01e8 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -196,21 +196,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	txenable = R300_TEX_0_ENABLE;
 
 	/* setup the VAP */
-	if (info->has_tcl) {
-	    BEGIN_VIDEO(27);
-	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, 0);
-	} else {
-	    BEGIN_VIDEO(10);
-	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-	}
-
-	OUT_VIDEO_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-	OUT_VIDEO_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
-				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
-				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
-	OUT_VIDEO_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
-	OUT_VIDEO_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+	if (info->has_tcl)
+	    BEGIN_VIDEO(22);
+	else
+	    BEGIN_VIDEO(5);
 
 	if (info->has_tcl) {
 	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
commit 4b9234e1c4f7c7f419cb4245d64f3f9756c98bb6
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 16:58:22 2008 -0400

    R3xx+: tcl wip

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index c7ee62c..1158f89 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1051,10 +1051,10 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     CARD32 txenable, colorpitch;
     CARD32 blendcntl;
     int pixel_shift;
-    int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
+    /*int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
 		   (info->ChipFamily != CHIP_FAMILY_RS740) &&
 		   (info->ChipFamily != CHIP_FAMILY_RS400) &&
-		   (info->ChipFamily != CHIP_FAMILY_RV515));
+		   (info->ChipFamily != CHIP_FAMILY_RV515));*/
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -1096,27 +1096,24 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     RADEON_SWITCH_TO_3D();
 
     /* setup the VAP */
-    if (has_tcl) {
+    if (info->has_tcl) {
 	BEGIN_ACCEL(9);
 	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
-	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-	OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
-				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
-				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+
     } else {
-	BEGIN_ACCEL(8);
+	BEGIN_ACCEL(9);
 	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-	OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
-				      (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
     }
 
+    OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+    OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+				  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				  (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
     OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
     OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
 
-    if (has_tcl) {
+    if (info->has_tcl) {
 	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
 		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
 		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -1192,7 +1189,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     FINISH_ACCEL();
 
     /* setup the vertex shader */
-    if (has_tcl) {
+    if (info->has_tcl) {
 	if (pMask) {
 	    BEGIN_ACCEL(22);
 	    /* flush the PVS before updating??? */
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 9d7da21..dbf8562 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -199,12 +199,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	if (info->has_tcl) {
 	    BEGIN_VIDEO(27);
 	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, 0);
-	    OUT_VIDEO_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
 	} else {
-	    BEGIN_VIDEO(9);
+	    BEGIN_VIDEO(10);
 	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
 	}
 
+	OUT_VIDEO_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
 	OUT_VIDEO_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
 				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
 				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
commit 865c463e3afb4759758f569132be8bf1386da5cc
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 16:51:04 2008 -0400

    R300+: textured video tcl cleanup

diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 6ef8cd0..9d7da21 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -106,15 +106,6 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
     dstyoff = 0;
 #endif
 
-#if 0
-    ErrorF("dst_offset: 0x%x\n", dst_offset);
-    ErrorF("dst_pitch: 0x%x\n", dst_pitch);
-    ErrorF("dstxoff: 0x%x\n", dstxoff);
-    ErrorF("dstyoff: 0x%x\n", dstyoff);
-    ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
-    ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
-#endif
-
     if (!info->XInited3D)
 	RADEONInit3DEngine(pScrn);
 
@@ -131,10 +122,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
     if (IS_R300_3D || IS_R500_3D) {
 	CARD32 output_fmt;
-	int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
+	/*int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
 		       (info->ChipFamily != CHIP_FAMILY_RS740) &&
 		       (info->ChipFamily != CHIP_FAMILY_RS400) &&
-		       (info->ChipFamily != CHIP_FAMILY_RV515));
+		       (info->ChipFamily != CHIP_FAMILY_RV515));*/
 
 	switch (pPixmap->drawable.bitsPerPixel) {
 	case 16:
@@ -205,27 +196,23 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	txenable = R300_TEX_0_ENABLE;
 
 	/* setup the VAP */
-	if (has_tcl) {
+	if (info->has_tcl) {
 	    BEGIN_VIDEO(27);
 	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, 0);
 	    OUT_VIDEO_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-	    OUT_VIDEO_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
-					  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-					  (4 << R300_PVS_NUM_FPUS_SHIFT) |
-					  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
 	} else {
 	    BEGIN_VIDEO(9);
 	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-	    OUT_VIDEO_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-					  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-					  (4 << R300_PVS_NUM_FPUS_SHIFT) |
-					  (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
 	}
 
+	OUT_VIDEO_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
 	OUT_VIDEO_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
 	OUT_VIDEO_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
 
-	if (has_tcl) {
+	if (info->has_tcl) {
 	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
 			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
 			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -276,7 +263,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	}
 
 	/* setup vertex shader */
-	if (has_tcl) {
+	if (info->has_tcl) {
 	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0,
 			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
 			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
commit 79c8d4ca36a1c3e5fe759d4ccc379c36af8f1676
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 16:28:18 2008 -0400

    RADEON: cleanup

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 6317ecb..c7ee62c 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1573,103 +1573,80 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	CARD32 mask_color, mask_alpha;
 
 	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
-	    //src_color = R300_ALU_RGB_0_0;
 	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
 			 R500_ALU_RGB_G_SWIZ_A_0 |
 			 R500_ALU_RGB_B_SWIZ_A_0);
 	else
-	    //src_color = R300_ALU_RGB_SRC0_RGB;
 	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
 			 R500_ALU_RGB_G_SWIZ_A_G |
 			 R500_ALU_RGB_B_SWIZ_A_B);
 
 	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
-	    //src_alpha = R300_ALU_ALPHA_1_0;
 	    src_alpha = R500_ALPHA_SWIZ_A_1;
 	else
-	    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 	    src_alpha = R500_ALPHA_SWIZ_A_A;
 
 	if (pMask && pMaskPicture->componentAlpha) {
 	    if (RadeonBlendOp[op].src_alpha) {
 		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
-		    //src_color = R300_ALU_RGB_1_0;
-		    //src_alpha = R300_ALU_ALPHA_1_0;
 		    src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
 				 R500_ALU_RGB_G_SWIZ_A_1 |
 				 R500_ALU_RGB_B_SWIZ_A_1);
 		    src_alpha = R500_ALPHA_SWIZ_A_1;
 		} else {
-		    //src_color = R300_ALU_RGB_SRC0_AAA;
-		    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 		    src_color = (R500_ALU_RGB_R_SWIZ_A_A |
 				 R500_ALU_RGB_G_SWIZ_A_A |
 				 R500_ALU_RGB_B_SWIZ_A_A);
 		    src_alpha = R500_ALPHA_SWIZ_A_A;
 		}
 
-		//mask_color = R300_ALU_RGB_SRC1_RGB;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
 			      R500_ALU_RGB_G_SWIZ_B_G |
 			      R500_ALU_RGB_B_SWIZ_B_B);
 
 		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
-		    //mask_alpha = R300_ALU_ALPHA_1_0;
 		    mask_alpha = R500_ALPHA_SWIZ_B_1;
 		else
-		    //mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		    mask_alpha = R500_ALPHA_SWIZ_B_A;
 
 	    } else {
-		//src_color = R300_ALU_RGB_SRC0_RGB;
 		src_color = (R500_ALU_RGB_R_SWIZ_A_R |
 			     R500_ALU_RGB_G_SWIZ_A_G |
 			     R500_ALU_RGB_B_SWIZ_A_B);
 
 		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
-		    //src_alpha = R300_ALU_ALPHA_1_0;
 		    src_alpha = R500_ALPHA_SWIZ_A_1;
 		else
-		    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 		    src_alpha = R500_ALPHA_SWIZ_A_A;
 
-		//mask_color = R300_ALU_RGB_SRC1_RGB;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
 			      R500_ALU_RGB_G_SWIZ_B_G |
 			      R500_ALU_RGB_B_SWIZ_B_B);
 
 		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
-		    //mask_alpha = R300_ALU_ALPHA_1_0;
 		    mask_alpha = R500_ALPHA_SWIZ_B_1;
 		else
-		    //mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		    mask_alpha = R500_ALPHA_SWIZ_B_A;
 
 	    }
 	} else if (pMask) {
 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
-		//mask_color = R300_ALU_RGB_1_0;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
 			      R500_ALU_RGB_G_SWIZ_B_1 |
 			      R500_ALU_RGB_B_SWIZ_B_1);
 	    else
-		//mask_color = R300_ALU_RGB_SRC1_AAA;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
 			      R500_ALU_RGB_G_SWIZ_B_A |
 			      R500_ALU_RGB_B_SWIZ_B_A);
 
 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
-		//mask_alpha = R300_ALU_ALPHA_1_0;
 		mask_alpha = R500_ALPHA_SWIZ_B_1;
 	    else
-		//mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		mask_alpha = R500_ALPHA_SWIZ_B_A;
 	} else {
-	    //mask_color = R300_ALU_RGB_1_0;
 	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
 			  R500_ALU_RGB_G_SWIZ_B_1 |
 			  R500_ALU_RGB_B_SWIZ_B_1);
-	    //mask_alpha = R300_ALU_ALPHA_1_0;
 	    mask_alpha = R500_ALPHA_SWIZ_B_1;
 	}
 
@@ -1795,9 +1772,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+						   R500_DY_Q_SWIZ_R));
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
 
 	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
 						   R500_INST_TEX_SEM_WAIT |
@@ -1830,9 +1807,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+						   R500_DY_Q_SWIZ_R));
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
 	} else {
 	    BEGIN_ACCEL(13);
 	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0);
@@ -1867,9 +1844,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+						   R500_DY_Q_SWIZ_R));
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
 	}
 
 	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index f5282cc..6ef8cd0 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -467,7 +467,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 					       R500_US_CODE_RANGE_SIZE(1)));
 	    OUT_VIDEO_REG(R500_US_CODE_OFFSET, 0);
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_INDEX, 0);
-	    // 7807
+
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
 						   R500_INST_TEX_SEM_WAIT |
 						   R500_INST_RGB_WMASK_R |
@@ -499,11 +499,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
-	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
-	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+						   R500_DY_Q_SWIZ_R));
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
 
-	    // 0x78105
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
 						   R500_INST_TEX_SEM_WAIT |
 						   R500_INST_LAST |
@@ -519,13 +518,13 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R500_RGB_ADDR1_CONST |
 						   R500_RGB_ADDR2(0) |
 						   R500_RGB_ADDR2_CONST |
-						   R500_RGB_SRCP_OP_1_MINUS_2RGB0)); //0x10040000
+						   R500_RGB_SRCP_OP_1_MINUS_2RGB0));
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
 						   R500_ALPHA_ADDR1(0) |
 						   R500_ALPHA_ADDR1_CONST |
 						   R500_ALPHA_ADDR2(0) |
 						   R500_ALPHA_ADDR2_CONST |
-						   R500_ALPHA_SRCP_OP_1_MINUS_2A0)); //0x10040000
+						   R500_ALPHA_SRCP_OP_1_MINUS_2A0));
 
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
 						   R500_ALU_RGB_R_SWIZ_A_R |
@@ -534,17 +533,17 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R500_ALU_RGB_SEL_B_SRC0 |
 						   R500_ALU_RGB_R_SWIZ_B_1 |
 						   R500_ALU_RGB_B_SWIZ_B_1 |
-						   R500_ALU_RGB_G_SWIZ_B_1));//0x00db0220
+						   R500_ALU_RGB_G_SWIZ_B_1));
 
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
 						   R500_ALPHA_SWIZ_A_A |
-						   R500_ALPHA_SWIZ_B_1));//0x00c0c000)
+						   R500_ALPHA_SWIZ_B_1));
 
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
 						   R500_ALU_RGBA_R_SWIZ_0 |
 						   R500_ALU_RGBA_G_SWIZ_0 |
 						   R500_ALU_RGBA_B_SWIZ_0 |
-						   R500_ALU_RGBA_A_SWIZ_0));//0x20490000
+						   R500_ALU_RGBA_A_SWIZ_0));
 	    FINISH_VIDEO();
 	}
 
commit c4821a287d29a65f3bcb7d60dc72ec13c0384008
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 16:20:17 2008 -0400

    Revert "R3xx/R5xx: move more VAP, etc. state setup into common init3d() function"
    
    This reverts commit 305a3310963a5dd07b3495015b06aa8c7c4e6b02.
    
    Conflicts:
    
    	src/radeon_commonfuncs.c
    	src/radeon_exa_render.c
    	src/radeon_textured_videofuncs.c

diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 987dac6..deb2c99 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -60,7 +60,9 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 
     info->texW[0] = info->texH[0] = info->texW[1] = info->texH[1] = 1;
 
-    if (IS_R300_3D || IS_R500_3D) {
+    if (IS_R300_VARIANT || IS_AVIVO_VARIANT ||
+	(info->ChipFamily == CHIP_FAMILY_RS690) ||
+	(info->ChipFamily == CHIP_FAMILY_RS740)) {
 
 	BEGIN_ACCEL(3);
 	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
@@ -142,27 +144,6 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_SU_DEPTH_OFFSET, 0);
 	FINISH_ACCEL();
 
-	/* setup the VAP */
-	BEGIN_ACCEL(6);
-	/* disable TCL/PVS */
-	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-	if (IS_R300_3D)
-	    OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-					  (6 << R300_PVS_NUM_CNTLRS_SHIFT) |
-					  (6 << R300_PVS_NUM_FPUS_SHIFT) |
-					  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
-	else
-	    OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-					  (6 << R300_PVS_NUM_CNTLRS_SHIFT) |
-					  (6 << R300_PVS_NUM_FPUS_SHIFT) |
-					  (12 << R300_VF_MAX_VTX_NUM_SHIFT) |
-					  R500_TCL_STATE_OPTIMIZATION));
-	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
-	OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
-	OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
-	FINISH_ACCEL();
-
 	BEGIN_ACCEL(4);
 	OUT_ACCEL_REG(R300_US_W_FMT, 0);
 	OUT_ACCEL_REG(R300_US_OUT_FMT_1, (R300_OUT_FMT_UNUSED |
@@ -189,8 +170,7 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_FG_ALPHA_FUNC, 0);
 	FINISH_ACCEL();
 
-	BEGIN_ACCEL(13);
-	OUT_ACCEL_REG(R300_RB3D_ABLENDCNTL, 0);
+	BEGIN_ACCEL(12);
 	OUT_ACCEL_REG(R300_RB3D_ZSTENCILCNTL, 0);
 	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
 	OUT_ACCEL_REG(R300_RB3D_BW_CNTL, 0);
@@ -216,7 +196,8 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_SC_SCISSOR1, ((8191 << R300_SCISSOR_X_SHIFT) |
 					 (8191 << R300_SCISSOR_Y_SHIFT)));
 
-	if (IS_R300_3D) {
+	if (IS_R300_VARIANT || (info->ChipFamily == CHIP_FAMILY_RS690) ||
+	    (info->ChipFamily == CHIP_FAMILY_RS740)) {
 	    /* clip has offset 1440 */
 	    OUT_ACCEL_REG(R300_SC_CLIP_0_A, ((1088 << R300_CLIP_X_SHIFT) |
 					     (1088 << R300_CLIP_Y_SHIFT)));
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index cd0fa48..6317ecb 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1051,6 +1051,10 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
     CARD32 txenable, colorpitch;
     CARD32 blendcntl;
     int pixel_shift;
+    int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
+		   (info->ChipFamily != CHIP_FAMILY_RS740) &&
+		   (info->ChipFamily != CHIP_FAMILY_RS400) &&
+		   (info->ChipFamily != CHIP_FAMILY_RV515));
     ACCEL_PREAMBLE();
 
     TRACE;
@@ -1091,49 +1095,234 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 
     RADEON_SWITCH_TO_3D();
 
-    BEGIN_ACCEL(8);
-    OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
-		  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
-		   (0 << R300_SKIP_DWORDS_0_SHIFT) |
-		   (0 << R300_DST_VEC_LOC_0_SHIFT) |
-		   R300_SIGNED_0 |
-		   (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
-		   (0 << R300_SKIP_DWORDS_1_SHIFT) |
-		   (6 << R300_DST_VEC_LOC_1_SHIFT) |
-		   R300_SIGNED_1));
-    OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
-		  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
-		   (0 << R300_SKIP_DWORDS_2_SHIFT) |
-		   (7 << R300_DST_VEC_LOC_2_SHIFT) |
-		   R300_LAST_VEC_2 |
-		   R300_SIGNED_2));
-    OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-		  ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
-		   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
-		   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
-		   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
-		   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
-		    << R300_WRITE_ENA_0_SHIFT) |
-		   (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
-		   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
-		   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
-		   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
-		   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
-		    << R300_WRITE_ENA_1_SHIFT)));
-    OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1,
-		  ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) |
-		   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) |
-		   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) |
-		   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) |
-		   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
-		    << R300_WRITE_ENA_2_SHIFT)));
+    /* setup the VAP */
+    if (has_tcl) {
+	BEGIN_ACCEL(9);
+	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, 0);
+	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+	OUT_ACCEL_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				      (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+    } else {
+	BEGIN_ACCEL(8);
+	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
+	OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
+				      (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
+    }
+
+    OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+    OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+
+    if (has_tcl) {
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
+		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
+		       R300_SIGNED_0 |
+		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
+		       (1 << R300_DST_VEC_LOC_1_SHIFT) |
+		       R300_SIGNED_1));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
+		       (2 << R300_DST_VEC_LOC_2_SHIFT) |
+		       R300_LAST_VEC_2 |
+		       R300_SIGNED_2));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_1_SHIFT)));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_2_SHIFT)));
+    } else {
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_0,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
+		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
+		       R300_SIGNED_0 |
+		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
+		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
+		       R300_SIGNED_1));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_1,
+		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_2_SHIFT) |
+		       (0 << R300_SKIP_DWORDS_2_SHIFT) |
+		       (7 << R300_DST_VEC_LOC_2_SHIFT) |
+		       R300_LAST_VEC_2 |
+		       R300_SIGNED_2));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_0_SHIFT) |
+		       (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_1_SHIFT)));
+	OUT_ACCEL_REG(R300_VAP_PROG_STREAM_CNTL_EXT_1,
+		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_2_SHIFT) |
+		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_2_SHIFT) |
+		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			<< R300_WRITE_ENA_2_SHIFT)));
+    }
+    FINISH_ACCEL();
+
+    /* setup the vertex shader */
+    if (has_tcl) {
+	if (pMask) {
+	    BEGIN_ACCEL(22);
+	    /* flush the PVS before updating??? */
+	    OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+
+	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
+			   (2 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			   (2 << R300_PVS_LAST_INST_SHIFT)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+			  (2 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+	} else {
+	    BEGIN_ACCEL(18);
+	    /* flush the PVS before updating??? */
+	    OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+
+	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_0,
+			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
+			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			   (1 << R300_PVS_LAST_INST_SHIFT)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_CODE_CNTL_1,
+			  (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+	}
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
+	/* PVS inst 0 */
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+		       R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+		       R300_PVS_DST_OFFSET(0) |
+		       R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+		       R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+		       R300_PVS_SRC_OFFSET(0) |
+		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+		       R300_PVS_SRC_OFFSET(0) |
+		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+		       R300_PVS_SRC_OFFSET(0) |
+		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	/* PVS inst 1 */
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+		       R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+		       R300_PVS_DST_OFFSET(1) |
+		       R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+		       R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+		       R300_PVS_SRC_OFFSET(1) |
+		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+		       R300_PVS_SRC_OFFSET(1) |
+		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+		      (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+		       R300_PVS_SRC_OFFSET(1) |
+		       R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+		       R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	if (pMask) {
+	    /* PVS inst 2 */
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(2) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(2) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(2) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_ACCEL_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(2) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	}
+
+	OUT_ACCEL_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
 
+	OUT_ACCEL_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
+	OUT_ACCEL_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+	FINISH_ACCEL();
+    }
+
+    BEGIN_ACCEL(4);
     OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
     OUT_ACCEL_REG(R300_VAP_OUT_VTX_FMT_1,
 		  ((2 << R300_TEX_0_COMP_CNT_SHIFT) |
 		   (2 << R300_TEX_1_COMP_CNT_SHIFT)));
 
-    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0);
+    OUT_ACCEL_REG(R300_TX_INVALTAGS, 0x0);
     OUT_ACCEL_REG(R300_TX_ENABLE, txenable);
     FINISH_ACCEL();
 
@@ -1384,80 +1573,103 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	CARD32 mask_color, mask_alpha;
 
 	if (PICT_FORMAT_RGB(pSrcPicture->format) == 0)
+	    //src_color = R300_ALU_RGB_0_0;
 	    src_color = (R500_ALU_RGB_R_SWIZ_A_0 |
 			 R500_ALU_RGB_G_SWIZ_A_0 |
 			 R500_ALU_RGB_B_SWIZ_A_0);
 	else
+	    //src_color = R300_ALU_RGB_SRC0_RGB;
 	    src_color = (R500_ALU_RGB_R_SWIZ_A_R |
 			 R500_ALU_RGB_G_SWIZ_A_G |
 			 R500_ALU_RGB_B_SWIZ_A_B);
 
 	if (PICT_FORMAT_A(pSrcPicture->format) == 0)
+	    //src_alpha = R300_ALU_ALPHA_1_0;
 	    src_alpha = R500_ALPHA_SWIZ_A_1;
 	else
+	    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 	    src_alpha = R500_ALPHA_SWIZ_A_A;
 
 	if (pMask && pMaskPicture->componentAlpha) {
 	    if (RadeonBlendOp[op].src_alpha) {
 		if (PICT_FORMAT_A(pSrcPicture->format) == 0) {
+		    //src_color = R300_ALU_RGB_1_0;
+		    //src_alpha = R300_ALU_ALPHA_1_0;
 		    src_color = (R500_ALU_RGB_R_SWIZ_A_1 |
 				 R500_ALU_RGB_G_SWIZ_A_1 |
 				 R500_ALU_RGB_B_SWIZ_A_1);
 		    src_alpha = R500_ALPHA_SWIZ_A_1;
 		} else {
+		    //src_color = R300_ALU_RGB_SRC0_AAA;
+		    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 		    src_color = (R500_ALU_RGB_R_SWIZ_A_A |
 				 R500_ALU_RGB_G_SWIZ_A_A |
 				 R500_ALU_RGB_B_SWIZ_A_A);
 		    src_alpha = R500_ALPHA_SWIZ_A_A;
 		}
 
+		//mask_color = R300_ALU_RGB_SRC1_RGB;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
 			      R500_ALU_RGB_G_SWIZ_B_G |
 			      R500_ALU_RGB_B_SWIZ_B_B);
 
 		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
+		    //mask_alpha = R300_ALU_ALPHA_1_0;
 		    mask_alpha = R500_ALPHA_SWIZ_B_1;
 		else
+		    //mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		    mask_alpha = R500_ALPHA_SWIZ_B_A;
 
 	    } else {
+		//src_color = R300_ALU_RGB_SRC0_RGB;
 		src_color = (R500_ALU_RGB_R_SWIZ_A_R |
 			     R500_ALU_RGB_G_SWIZ_A_G |
 			     R500_ALU_RGB_B_SWIZ_A_B);
 
 		if (PICT_FORMAT_A(pSrcPicture->format) == 0)
+		    //src_alpha = R300_ALU_ALPHA_1_0;
 		    src_alpha = R500_ALPHA_SWIZ_A_1;
 		else
+		    //src_alpha = R300_ALU_ALPHA_SRC0_A;
 		    src_alpha = R500_ALPHA_SWIZ_A_A;
 
+		//mask_color = R300_ALU_RGB_SRC1_RGB;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_R |
 			      R500_ALU_RGB_G_SWIZ_B_G |
 			      R500_ALU_RGB_B_SWIZ_B_B);
 
 		if (PICT_FORMAT_A(pMaskPicture->format) == 0)
+		    //mask_alpha = R300_ALU_ALPHA_1_0;
 		    mask_alpha = R500_ALPHA_SWIZ_B_1;
 		else
+		    //mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		    mask_alpha = R500_ALPHA_SWIZ_B_A;
 
 	    }
 	} else if (pMask) {
 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
+		//mask_color = R300_ALU_RGB_1_0;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
 			      R500_ALU_RGB_G_SWIZ_B_1 |
 			      R500_ALU_RGB_B_SWIZ_B_1);
 	    else
+		//mask_color = R300_ALU_RGB_SRC1_AAA;
 		mask_color = (R500_ALU_RGB_R_SWIZ_B_A |
 			      R500_ALU_RGB_G_SWIZ_B_A |
 			      R500_ALU_RGB_B_SWIZ_B_A);
 
 	    if (PICT_FORMAT_A(pMaskPicture->format) == 0)
+		//mask_alpha = R300_ALU_ALPHA_1_0;
 		mask_alpha = R500_ALPHA_SWIZ_B_1;
 	    else
+		//mask_alpha = R300_ALU_ALPHA_SRC1_A;
 		mask_alpha = R500_ALPHA_SWIZ_B_A;
 	} else {
+	    //mask_color = R300_ALU_RGB_1_0;
 	    mask_color = (R500_ALU_RGB_R_SWIZ_B_1 |
 			  R500_ALU_RGB_G_SWIZ_B_1 |
 			  R500_ALU_RGB_B_SWIZ_B_1);
+	    //mask_alpha = R300_ALU_ALPHA_1_0;
 	    mask_alpha = R500_ALPHA_SWIZ_B_1;
 	}
 
@@ -1531,7 +1743,7 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
 					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
 
-	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+	    OUT_ACCEL_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(1) | R300_TX_OFFSET_RS(6));
 
 	    /* src tex */
 	    OUT_ACCEL_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) |
@@ -1583,9 +1795,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R));
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
 
 	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
 						   R500_INST_TEX_SEM_WAIT |
@@ -1618,9 +1830,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R));
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
 	} else {
 	    BEGIN_ACCEL(13);
 	    OUT_ACCEL_REG(R500_GA_US_VECTOR_INDEX, 0);
@@ -1655,9 +1867,9 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R));
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+	    OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
 	}
 
 	OUT_ACCEL_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
@@ -1701,13 +1913,14 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 	FINISH_ACCEL();
     }
 
-    BEGIN_ACCEL(3);
+    BEGIN_ACCEL(4);
 
     OUT_ACCEL_REG(R300_RB3D_COLOROFFSET0, dst_offset);
     OUT_ACCEL_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
     blendcntl = RADEONGetBlendCntl(op, pMaskPicture, pDstPicture->format);
     OUT_ACCEL_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
+    OUT_ACCEL_REG(R300_RB3D_ABLENDCNTL, 0);
 
     FINISH_ACCEL();
 
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index aa756f7..f5282cc 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -106,6 +106,15 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
     dstyoff = 0;
 #endif
 
+#if 0
+    ErrorF("dst_offset: 0x%x\n", dst_offset);
+    ErrorF("dst_pitch: 0x%x\n", dst_pitch);
+    ErrorF("dstxoff: 0x%x\n", dstxoff);
+    ErrorF("dstyoff: 0x%x\n", dstyoff);
+    ErrorF("src_offset: 0x%x\n", pPriv->src_offset);
+    ErrorF("src_pitch: 0x%x\n", pPriv->src_pitch);
+#endif
+
     if (!info->XInited3D)
 	RADEONInit3DEngine(pScrn);
 
@@ -122,6 +131,10 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
     if (IS_R300_3D || IS_R500_3D) {
 	CARD32 output_fmt;
+	int has_tcl = ((info->ChipFamily != CHIP_FAMILY_RS690) &&
+		       (info->ChipFamily != CHIP_FAMILY_RS740) &&
+		       (info->ChipFamily != CHIP_FAMILY_RS400) &&
+		       (info->ChipFamily != CHIP_FAMILY_RV515));
 
 	switch (pPixmap->drawable.bitsPerPixel) {
 	case 16:
@@ -191,36 +204,154 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	txenable = R300_TEX_0_ENABLE;
 
-	BEGIN_VIDEO(7);
-	OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
-		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
-		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
-		       (0 << R300_DST_VEC_LOC_0_SHIFT) |
-		       R300_SIGNED_0 |
-		       (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
-		       (0 << R300_SKIP_DWORDS_1_SHIFT) |
-		       (6 << R300_DST_VEC_LOC_1_SHIFT) |
-		       R300_LAST_VEC_1 |
-		       R300_SIGNED_1));
-	OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
-		      ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
-		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
-		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
-		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
-		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
-			<< R300_WRITE_ENA_0_SHIFT) |
-		       (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
-		       (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
-		       (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
-		       (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
-		       ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
-			<< R300_WRITE_ENA_1_SHIFT)));
+	/* setup the VAP */
+	if (has_tcl) {
+	    BEGIN_VIDEO(27);
+	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, 0);
+	    OUT_VIDEO_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
+	    OUT_VIDEO_REG(R300_VAP_CNTL, ((6 << R300_PVS_NUM_SLOTS_SHIFT) |
+					  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+					  (4 << R300_PVS_NUM_FPUS_SHIFT) |
+					  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	} else {
+	    BEGIN_VIDEO(9);
+	    OUT_VIDEO_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
+	    OUT_VIDEO_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+					  (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
+					  (4 << R300_PVS_NUM_FPUS_SHIFT) |
+					  (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	}
+
+	OUT_VIDEO_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
+	OUT_VIDEO_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
+
+	if (has_tcl) {
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
+			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
+			   (0 << R300_DST_VEC_LOC_0_SHIFT) |
+			   R300_SIGNED_0 |
+			   (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_1_SHIFT) |
+			   (10 << R300_DST_VEC_LOC_1_SHIFT) |
+			   R300_LAST_VEC_1 |
+			   R300_SIGNED_1));
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+			  ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			    << R300_WRITE_ENA_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Z << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_W << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y | R300_WRITE_ENA_Z | R300_WRITE_ENA_W)
+			    << R300_WRITE_ENA_1_SHIFT)));
+	} else {
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
+			  ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_0_SHIFT) |
+			   (0 << R300_DST_VEC_LOC_0_SHIFT) |
+			   R300_SIGNED_0 |
+			   (R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_1_SHIFT) |
+			   (0 << R300_SKIP_DWORDS_1_SHIFT) |
+			   (6 << R300_DST_VEC_LOC_1_SHIFT) |
+			   R300_LAST_VEC_1 |
+			   R300_SIGNED_1));
+	    OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_EXT_0,
+			  ((R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_0_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			    << R300_WRITE_ENA_0_SHIFT) |
+			   (R300_SWIZZLE_SELECT_X << R300_SWIZZLE_SELECT_X_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_Y << R300_SWIZZLE_SELECT_Y_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ZERO << R300_SWIZZLE_SELECT_Z_1_SHIFT) |
+			   (R300_SWIZZLE_SELECT_FP_ONE << R300_SWIZZLE_SELECT_W_1_SHIFT) |
+			   ((R300_WRITE_ENA_X | R300_WRITE_ENA_Y)
+			    << R300_WRITE_ENA_1_SHIFT)));
+	}
+
+	/* setup vertex shader */
+	if (has_tcl) {
+	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_0,
+			  ((0 << R300_PVS_FIRST_INST_SHIFT) |
+			   (1 << R300_PVS_XYZW_VALID_INST_SHIFT) |
+			   (1 << R300_PVS_LAST_INST_SHIFT)));
+	    OUT_VIDEO_REG(R300_VAP_PVS_CODE_CNTL_1,
+			  (1 << R300_PVS_LAST_VTX_SRC_INST_SHIFT));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_INDX_REG, 0);
+
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(0) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(0) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_DST_OPCODE(R300_VE_ADD) |
+			   R300_PVS_DST_REG_TYPE(R300_PVS_DST_REG_OUT) |
+			   R300_PVS_DST_OFFSET(1) |
+			   R300_PVS_DST_WE_X | R300_PVS_DST_WE_Y |
+			   R300_PVS_DST_WE_Z | R300_PVS_DST_WE_W));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(10) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_X) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_Y) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_Z) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_W)));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(10) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+	    OUT_VIDEO_REG(R300_VAP_PVS_VECTOR_DATA_REG,
+			  (R300_PVS_SRC_REG_TYPE(R300_PVS_SRC_REG_INPUT) |
+			   R300_PVS_SRC_OFFSET(10) |
+			   R300_PVS_SRC_SWIZZLE_X(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Y(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_Z(R300_PVS_SRC_SELECT_FORCE_0) |
+			   R300_PVS_SRC_SWIZZLE_W(R300_PVS_SRC_SELECT_FORCE_0)));
+
+	    OUT_VIDEO_REG(R300_VAP_PVS_FLOW_CNTL_OPC, 0);
+
+	    OUT_VIDEO_REG(R300_VAP_GB_VERT_CLIP_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_GB_VERT_DISC_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_GB_HORZ_CLIP_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_GB_HORZ_DISC_ADJ, 0x3f800000);
+	    OUT_VIDEO_REG(R300_VAP_CLIP_CNTL, R300_CLIP_DISABLE);
+	}
 
 	OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_0, R300_VTX_POS_PRESENT);
 	OUT_VIDEO_REG(R300_VAP_OUT_VTX_FMT_1, (2 << R300_TEX_0_COMP_CNT_SHIFT));
-
-	OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
-	OUT_VIDEO_REG(R300_TX_ENABLE, txenable);
 	OUT_VIDEO_REG(R300_US_OUT_FMT_0, output_fmt);
 	FINISH_VIDEO();
 
@@ -238,7 +369,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 			   R300_RS_SEL_T(R300_RS_SEL_C1) |
 			   R300_RS_SEL_R(R300_RS_SEL_K0) |
 			   R300_RS_SEL_Q(R300_RS_SEL_K1)));
-	    OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
+	    OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_TX_OFFSET_RS(6));
 	    OUT_VIDEO_REG(R300_RS_INST_0, R300_RS_INST_TEX_CN_WRITE);
 	    OUT_VIDEO_REG(R300_US_CONFIG, (0 << R300_NLEVEL_SHIFT) | R300_FIRST_TEX);
 	    OUT_VIDEO_REG(R300_US_PIXSIZE, 0);
@@ -325,11 +456,8 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 					 (R500_RS_IP_PTR_K0 << R500_RS_IP_TEX_PTR_R_SHIFT) |
 					 (R500_RS_IP_PTR_K1 << R500_RS_IP_TEX_PTR_Q_SHIFT)));
 
-	    OUT_VIDEO_REG(R300_RS_INST_COUNT, R300_INST_COUNT_RS(0) | R300_TX_OFFSET_RS(6));
-
-	    OUT_VIDEO_REG(R500_RS_INST_0, ((0 << R500_RS_INST_TEX_ID_SHIFT) |
-					   R500_RS_INST_TEX_CN_WRITE |
-					   (0 << R500_RS_INST_TEX_ADDR_SHIFT)));
+	    OUT_VIDEO_REG(R300_RS_INST_COUNT, 0);
+	    OUT_VIDEO_REG(R500_RS_INST_0, R500_RS_INST_TEX_CN_WRITE);
 	    OUT_VIDEO_REG(R300_US_CONFIG, R500_ZERO_TIMES_ANYTHING_EQUALS_ZERO);
 	    OUT_VIDEO_REG(R300_US_PIXSIZE, 0);
 	    OUT_VIDEO_REG(R500_US_FC_CTRL, 0);
@@ -339,7 +467,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 					       R500_US_CODE_RANGE_SIZE(1)));
 	    OUT_VIDEO_REG(R500_US_CODE_OFFSET, 0);
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_INDEX, 0);
-
+	    // 7807
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_TEX |
 						   R500_INST_TEX_SEM_WAIT |
 						   R500_INST_RGB_WMASK_R |
@@ -371,10 +499,11 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R500_DY_S_SWIZ_R |
 						   R500_DY_T_SWIZ_R |
 						   R500_DY_R_SWIZ_R |
-						   R500_DY_Q_SWIZ_R));
-	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
-	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000);
+						   R500_DY_Q_SWIZ_R)); // TEX_ADDR_DXDY
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
+	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, 0x00000000); // mbz
 
+	    // 0x78105
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_INST_TYPE_OUT |
 						   R500_INST_TEX_SEM_WAIT |
 						   R500_INST_LAST |
@@ -390,13 +519,13 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R500_RGB_ADDR1_CONST |
 						   R500_RGB_ADDR2(0) |
 						   R500_RGB_ADDR2_CONST |
-						   R500_RGB_SRCP_OP_1_MINUS_2RGB0));
+						   R500_RGB_SRCP_OP_1_MINUS_2RGB0)); //0x10040000
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_ADDR0(0) |
 						   R500_ALPHA_ADDR1(0) |
 						   R500_ALPHA_ADDR1_CONST |
 						   R500_ALPHA_ADDR2(0) |
 						   R500_ALPHA_ADDR2_CONST |
-						   R500_ALPHA_SRCP_OP_1_MINUS_2A0));
+						   R500_ALPHA_SRCP_OP_1_MINUS_2A0)); //0x10040000
 
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGB_SEL_A_SRC0 |
 						   R500_ALU_RGB_R_SWIZ_A_R |
@@ -405,28 +534,33 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 						   R500_ALU_RGB_SEL_B_SRC0 |
 						   R500_ALU_RGB_R_SWIZ_B_1 |
 						   R500_ALU_RGB_B_SWIZ_B_1 |
-						   R500_ALU_RGB_G_SWIZ_B_1));
+						   R500_ALU_RGB_G_SWIZ_B_1));//0x00db0220
 
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALPHA_OP_MAD |
 						   R500_ALPHA_SWIZ_A_A |
-						   R500_ALPHA_SWIZ_B_1));
+						   R500_ALPHA_SWIZ_B_1));//0x00c0c000)
 
 	    OUT_VIDEO_REG(R500_GA_US_VECTOR_DATA, (R500_ALU_RGBA_OP_MAD |
 						   R500_ALU_RGBA_R_SWIZ_0 |
 						   R500_ALU_RGBA_G_SWIZ_0 |
 						   R500_ALU_RGBA_B_SWIZ_0 |
-						   R500_ALU_RGBA_A_SWIZ_0));
+						   R500_ALU_RGBA_A_SWIZ_0));//0x20490000
 	    FINISH_VIDEO();
 	}
 
-	BEGIN_VIDEO(4);
+	BEGIN_VIDEO(6);
+	OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
+	OUT_VIDEO_REG(R300_TX_ENABLE, txenable);
 
 	OUT_VIDEO_REG(R300_RB3D_COLOROFFSET0, dst_offset);
 	OUT_VIDEO_REG(R300_RB3D_COLORPITCH0, colorpitch);
 
 	blendcntl = RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO;
-	OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl | R300_ALPHA_BLEND_ENABLE | R300_READ_ENABLE);
+	OUT_VIDEO_REG(R300_RB3D_BLENDCNTL, blendcntl);
+	OUT_VIDEO_REG(R300_RB3D_ABLENDCNTL, 0);
+	FINISH_VIDEO();
 
+	BEGIN_VIDEO(1);
 	OUT_VIDEO_REG(R300_VAP_VTX_SIZE, VTX_DWORD_COUNT);
 	FINISH_VIDEO();
 
commit 0032c80bf30bab189204e3e6929e18a19d753138
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 14:35:00 2008 -0400

    RADEON: store tcl status in driver rec

diff --git a/src/radeon.h b/src/radeon.h
index 122a9dd..feff48f 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -762,6 +762,7 @@ typedef struct {
     void *fb_shadow;
 
     int num_gb_pipes;
+    Bool has_tcl;
 } RADEONInfoRec, *RADEONInfoPtr;
 
 #define RADEONWaitForFifo(pScrn, entries)				\
diff --git a/src/radeon_driver.c b/src/radeon_driver.c
index d5595ea..3d4f05f 100644
--- a/src/radeon_driver.c
+++ b/src/radeon_driver.c
@@ -1785,6 +1785,23 @@ static Bool RADEONPreInitChipType(ScrnInfoPtr pScrn)
         if (!xf86LoadSubModule(pScrn, "shadow"))
             return FALSE;
     }
+
+
+    if ((info->ChipFamily == CHIP_FAMILY_RS100) ||
+	(info->ChipFamily == CHIP_FAMILY_RS200) ||
+	(info->ChipFamily == CHIP_FAMILY_RS300) ||
+	(info->ChipFamily == CHIP_FAMILY_RS400) ||
+	(info->ChipFamily == CHIP_FAMILY_RS690) ||
+	(info->ChipFamily == CHIP_FAMILY_RS740))
+	info->has_tcl = FALSE;
+    else {
+	/* need to sort out why PVS has issues on RV515 */
+	if (info->ChipFamily == CHIP_FAMILY_RV515)
+	    info->has_tcl = FALSE;
+	else
+	    info->has_tcl = TRUE;
+    }
+
     return TRUE;
 }
 
commit 9e2ffe66d106abe34a670d2edc9905ed62c485e8
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 14:24:04 2008 -0400

    R3xx+: use the right register for engine flush

diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index 67dae7c..7865de1 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -158,17 +158,32 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn)
     unsigned char *RADEONMMIO = info->MMIO;
     int            i;
 
-    OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
-	    RADEON_RB3D_DC_FLUSH_ALL,
-	    ~RADEON_RB3D_DC_FLUSH_ALL);
-    for (i = 0; i < RADEON_TIMEOUT; i++) {
-	if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
-	    break;
-    }
-    if (i == RADEON_TIMEOUT) {
-	xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
-		       "DC flush timeout: %x\n",
-		       (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
+    if (info->ChipFamily <= CHIP_FAMILY_RV280) {
+	OUTREGP(RADEON_RB3D_DSTCACHE_CTLSTAT,
+		RADEON_RB3D_DC_FLUSH_ALL,
+		~RADEON_RB3D_DC_FLUSH_ALL);
+	for (i = 0; i < RADEON_TIMEOUT; i++) {
+	    if (!(INREG(RADEON_RB3D_DSTCACHE_CTLSTAT) & RADEON_RB3D_DC_BUSY))
+		break;
+	}
+	if (i == RADEON_TIMEOUT) {
+	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
+			   "DC flush timeout: %x\n",
+			   (unsigned int)INREG(RADEON_RB3D_DSTCACHE_CTLSTAT));
+	}
+    } else {
+	OUTREGP(R300_RB2D_DSTCACHE_CTLSTAT,
+		R300_RB2D_DC_FLUSH_ALL,
+		~R300_RB2D_DC_FLUSH_ALL);
+	for (i = 0; i < RADEON_TIMEOUT; i++) {
+	    if (!(INREG(R300_RB2D_DSTCACHE_CTLSTAT) & R300_RB2D_DC_BUSY))
+		break;
+	}
+	if (i == RADEON_TIMEOUT) {
+	    xf86DrvMsgVerb(pScrn->scrnIndex, X_INFO, RADEON_LOGLEVEL_DEBUG,
+			   "DC flush timeout: %x\n",
+			   (unsigned int)INREG(R300_RB2D_DSTCACHE_CTLSTAT));
+	}
     }
 }
 
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index 6ace342..aaac052 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4614,6 +4614,7 @@
 #       define R300_DC_FLUSH_2D                         (1 << 0)
 #       define R300_DC_FREE_2D                          (1 << 2)
 #       define R300_RB2D_DC_FLUSH_ALL                   (R300_DC_FLUSH_2D | R300_DC_FREE_2D)
+#       define R300_RB2D_DC_BUSY                        (1 << 31)
 #define R300_RB3D_DSTCACHE_CTLSTAT		        0x4e4c
 #       define R300_DC_FLUSH_3D                         (2 << 0)
 #       define R300_DC_FREE_3D                          (2 << 2)
commit e1a9f26c2d2cbca9ad159e723ec95b95be1ef349
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 14:12:15 2008 -0400

    R3xx+: minor textured video fixes
    
    - set shader output swizzling correctly
    - flush the right cache register on r3xx+

diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index e2db615..aa756f7 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -111,13 +111,17 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
     /* we can probably improve this */
     BEGIN_VIDEO(2);
-    OUT_VIDEO_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
+    if (IS_R300_3D || IS_R500_3D)
+	OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
+    else
+	OUT_VIDEO_REG(RADEON_RB3D_DSTCACHE_CTLSTAT, RADEON_RB3D_DC_FLUSH);
     /* We must wait for 3d to idle, in case source was just written as a dest. */
     OUT_VIDEO_REG(RADEON_WAIT_UNTIL,
 		RADEON_WAIT_HOST_IDLECLEAN | RADEON_WAIT_2D_IDLECLEAN | RADEON_WAIT_3D_IDLECLEAN);
     FINISH_VIDEO();
 
     if (IS_R300_3D || IS_R500_3D) {
+	CARD32 output_fmt;
 
 	switch (pPixmap->drawable.bitsPerPixel) {
 	case 16:
@@ -133,6 +137,12 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	    return;
 	}
 
+	output_fmt = (R300_OUT_FMT_C4_8 |
+		      R300_OUT_FMT_C0_SEL_BLUE |
+		      R300_OUT_FMT_C1_SEL_GREEN |
+		      R300_OUT_FMT_C2_SEL_RED |
+		      R300_OUT_FMT_C3_SEL_ALPHA);
+
 	colorpitch = dst_pitch >> pixel_shift;
 	colorpitch |= dst_format;
 
@@ -181,7 +191,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	txenable = R300_TEX_0_ENABLE;
 
-	BEGIN_VIDEO(6);
+	BEGIN_VIDEO(7);
 	OUT_VIDEO_REG(R300_VAP_PROG_STREAM_CNTL_0,
 		      ((R300_DATA_TYPE_FLOAT_2 << R300_DATA_TYPE_0_SHIFT) |
 		       (0 << R300_SKIP_DWORDS_0_SHIFT) |
@@ -211,6 +221,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 
 	OUT_VIDEO_REG(R300_TX_INVALTAGS, 0);
 	OUT_VIDEO_REG(R300_TX_ENABLE, txenable);
+	OUT_VIDEO_REG(R300_US_OUT_FMT_0, output_fmt);
 	FINISH_VIDEO();
 
 	/* setup pixel shader */
commit d79040906cd25bd494feb5901f465bbd050aa923
Author: Alex Deucher <alex at botch2.com>
Date:   Thu Apr 10 13:59:58 2008 -0400

    R3xx+: EXA/textured video fixes
    
    - get pipe config based on GB_PIPE_SELECT where applicable
    (adapted from a similar patch from Dave)
    - only flush the dst cache after submitting vertices, freeing
    the cache lines stalls the pipe
    - no need to wait for 3D idle after submitting vertices
    - fix PURGE_CACHE() and PURGE_ZCACHE() for r3xx+
    - fix depth 16 with EXA composite

diff --git a/src/radeon.h b/src/radeon.h
index ef62883..122a9dd 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -760,6 +760,8 @@ typedef struct {
 
     Bool              r600_shadow_fb;
     void *fb_shadow;
+
+    int num_gb_pipes;
 } RADEONInfoRec, *RADEONInfoPtr;
 
 #define RADEONWaitForFifo(pScrn, entries)				\
@@ -1188,15 +1190,27 @@ do {									\
 #define RADEON_PURGE_CACHE()						\
 do {									\
     BEGIN_RING(2);							\
-    OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));		\
-    OUT_RING(RADEON_RB3D_DC_FLUSH_ALL);					\
+    if (info->ChipFamily <= CHIP_FAMILY_RV280) {                        \
+        OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0));		\
+        OUT_RING(RADEON_RB3D_DC_FLUSH_ALL);				\
+    } else {                                                            \
+        OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));		\
+        OUT_RING(R300_RB3D_DC_FLUSH_ALL);				\
+    }                                                                   \
     ADVANCE_RING();							\
 } while (0)
 
 #define RADEON_PURGE_ZCACHE()						\
 do {									\
-    OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));		\
-    OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL);					\
+    BEGIN_RING(2);							\
+    if (info->ChipFamily <= CHIP_FAMILY_RV280) {                        \
+        OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0));		\
+        OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL);				\
+    } else {                                                            \
+        OUT_RING(CP_PACKET0(R300_RB3D_ZCACHE_CTLSTAT, 0));		\
+        OUT_RING(R300_ZC_FLUSH_ALL);					\
+    }                                                                   \
+    ADVANCE_RING();							\
 } while (0)
 
 #endif /* XF86DRI */
diff --git a/src/radeon_accel.c b/src/radeon_accel.c
index 015d176..67dae7c 100644
--- a/src/radeon_accel.c
+++ b/src/radeon_accel.c
@@ -400,6 +400,33 @@ void RADEONEngineInit(ScrnInfoPtr pScrn)
     info->aux_sc_cntl     = 0x00000000;
 #endif
 
+    if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
+	(info->ChipFamily == CHIP_FAMILY_R420)  ||
+	(info->ChipFamily == CHIP_FAMILY_RS690) ||
+	(info->ChipFamily == CHIP_FAMILY_RS740) ||
+	(info->ChipFamily == CHIP_FAMILY_RS400) ||
+	IS_R500_3D) {
+	uint32_t gb_pipe_sel = INREG(R400_GB_PIPE_SELECT);
+	if (info->num_gb_pipes == 0) {
+	    info->num_gb_pipes = ((gb_pipe_sel >> 12) & 0x3) + 1;
+	    xf86DrvMsg(pScrn->scrnIndex, X_INFO,
+		       "%s: num pipes is %d\n", __FUNCTION__, info->num_gb_pipes);
+	}
+	if (IS_R500_3D)
+	    OUTPLL(pScrn, R500_DYN_SCLK_PWMEM_PIPE, (1 | ((gb_pipe_sel >> 8) & 0xf) << 4));
+    } else {
+	if (info->num_gb_pipes == 0) {
+	    if ((info->ChipFamily == CHIP_FAMILY_R300) ||
+		(info->ChipFamily == CHIP_FAMILY_R350)) {
+		/* R3xx chips */
+		info->num_gb_pipes = 2;
+	    } else {
+		/* RV3xx chips */
+		info->num_gb_pipes = 1;
+	    }
+	}
+    }
+
     RADEONEngineRestore(pScrn);
 }
 
diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c
index 5c9eae1..987dac6 100644
--- a/src/radeon_commonfuncs.c
+++ b/src/radeon_commonfuncs.c
@@ -55,7 +55,7 @@
 static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr  info       = RADEONPTR(pScrn);
-    CARD32 gb_tile_config;
+    CARD32 gb_tile_config, su_reg_dest;
     ACCEL_PREAMBLE();
 
     info->texW[0] = info->texH[0] = info->texW[1] = info->texH[1] = 1;
@@ -70,27 +70,12 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 
 	gb_tile_config = (R300_ENABLE_TILING | R300_TILE_SIZE_16 | R300_SUBPIXEL_1_16);
 
-	if ((info->Chipset == PCI_CHIP_RV410_5E4C) ||
-	    (info->Chipset == PCI_CHIP_RV410_5E4F)) {
-	    /* RV410 SE chips */
-	    gb_tile_config |= R300_PIPE_COUNT_RV350;
-	} else if ((info->ChipFamily == CHIP_FAMILY_RV350) ||
-		   (info->ChipFamily == CHIP_FAMILY_RV380) ||
-		   (info->ChipFamily == CHIP_FAMILY_RS400)) {
-	    /* RV3xx, RS4xx chips */
-	    gb_tile_config |= R300_PIPE_COUNT_RV350;
-	} else if ((info->ChipFamily == CHIP_FAMILY_R300) ||
-		   (info->ChipFamily == CHIP_FAMILY_R350)) {
-	    /* R3xx chips */
-	    gb_tile_config |= R300_PIPE_COUNT_R300;
-	} else if ((info->ChipFamily == CHIP_FAMILY_RV410) ||
-		   (info->ChipFamily == CHIP_FAMILY_RS690) ||
-		   (info->ChipFamily == CHIP_FAMILY_RS740)) {
-	    /* RV4xx, RS6xx chips */
-	    gb_tile_config |= R300_PIPE_COUNT_R420_3P;
-	} else {
-	    /* R4xx, R5xx chips */
-	    gb_tile_config |= R300_PIPE_COUNT_R420;
+	switch(info->num_gb_pipes) {
+	case 2: gb_tile_config |= R300_PIPE_COUNT_R300; break;
+	case 3: gb_tile_config |= R300_PIPE_COUNT_R420_3P; break;
+	case 4: gb_tile_config |= R300_PIPE_COUNT_R420; break;
+	default:
+	case 1: gb_tile_config |= R300_PIPE_COUNT_RV350; break;
 	}
 
 	BEGIN_ACCEL(3);
@@ -99,6 +84,14 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	OUT_ACCEL_REG(R300_GB_ENABLE, 0);
 	FINISH_ACCEL();
 
+	if (IS_R500_3D) {
+	    su_reg_dest = ((1 << info->num_gb_pipes) - 1);
+	    BEGIN_ACCEL(2);
+	    OUT_ACCEL_REG(R500_SU_REG_DEST, su_reg_dest);
+	    OUT_ACCEL_REG(R500_VAP_INDEX_OFFSET, 0);
+	    FINISH_ACCEL();
+	}
+
 	BEGIN_ACCEL(3);
 	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
 	OUT_ACCEL_REG(R300_RB3D_ZCACHE_CTLSTAT, R300_ZC_FLUSH | R300_ZC_FREE);
@@ -150,14 +143,22 @@ static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn)
 	FINISH_ACCEL();
 
 	/* setup the VAP */
-	BEGIN_ACCEL(5);
+	BEGIN_ACCEL(6);
 	/* disable TCL/PVS */
 	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
 	OUT_ACCEL_REG(R300_VAP_CNTL_STATUS, R300_PVS_BYPASS);
-	OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
-				      (5 << R300_PVS_NUM_CNTLRS_SHIFT) |
-				      (4 << R300_PVS_NUM_FPUS_SHIFT) |
-				      (5 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	if (IS_R300_3D)
+	    OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+					  (6 << R300_PVS_NUM_CNTLRS_SHIFT) |
+					  (6 << R300_PVS_NUM_FPUS_SHIFT) |
+					  (12 << R300_VF_MAX_VTX_NUM_SHIFT)));
+	else
+	    OUT_ACCEL_REG(R300_VAP_CNTL, ((10 << R300_PVS_NUM_SLOTS_SHIFT) |
+					  (6 << R300_PVS_NUM_CNTLRS_SHIFT) |
+					  (6 << R300_PVS_NUM_FPUS_SHIFT) |
+					  (12 << R300_VF_MAX_VTX_NUM_SHIFT) |
+					  R500_TCL_STATE_OPTIMIZATION));
+	OUT_ACCEL_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0);
 	OUT_ACCEL_REG(R300_VAP_VTE_CNTL, R300_VTX_XY_FMT | R300_VTX_Z_FMT);
 	OUT_ACCEL_REG(R300_VAP_PSC_SGN_NORM_CNTL, 0);
 	FINISH_ACCEL();
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index a97b752..cd0fa48 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1220,16 +1220,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 			  R300_OUT_FMT_C2_SEL_BLUE |
 			  R300_OUT_FMT_C3_SEL_ALPHA);
 	    break;
-	case PICT_a1r5g5b5:
-	case PICT_x1r5g5b5:
-	    /* fix me */
-	case PICT_r5g6b5:
-	    output_fmt = (R300_OUT_FMT_C_5_6_5 |
-			  R300_OUT_FMT_C0_SEL_BLUE |
-			  R300_OUT_FMT_C1_SEL_GREEN |
-			  R300_OUT_FMT_C2_SEL_RED |
-			  R300_OUT_FMT_C3_SEL_ALPHA);
-	    break;
 	case PICT_a8:
 	    output_fmt = (R300_OUT_FMT_C4_8 |
 			  R300_OUT_FMT_C0_SEL_ALPHA);
@@ -1490,16 +1480,6 @@ static Bool FUNC_NAME(R300PrepareComposite)(int op, PicturePtr pSrcPicture,
 			  R300_OUT_FMT_C2_SEL_BLUE |
 			  R300_OUT_FMT_C3_SEL_ALPHA);
 	    break;
-	case PICT_a1r5g5b5:
-	case PICT_x1r5g5b5:
-	    /* fix me */
-	case PICT_r5g6b5:
-	    output_fmt = (R300_OUT_FMT_C_5_6_5 |
-			  R300_OUT_FMT_C0_SEL_BLUE |
-			  R300_OUT_FMT_C1_SEL_GREEN |
-			  R300_OUT_FMT_C2_SEL_RED |
-			  R300_OUT_FMT_C3_SEL_ALPHA);
-	    break;
 	case PICT_a8:
 	    output_fmt = (R300_OUT_FMT_C4_8 |
 			  R300_OUT_FMT_C0_SEL_ALPHA);
@@ -1825,7 +1805,7 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 
     vtx_count = VTX_COUNT;
 
-    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
+    if (IS_R300_3D || IS_R500_3D) {
 	BEGIN_ACCEL(1);
 	OUT_ACCEL_REG(R300_VAP_VTX_SIZE, vtx_count);
 	FINISH_ACCEL();
@@ -1845,8 +1825,8 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 		 RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
 		 (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
     } else {
-	if (IS_R300_VARIANT || IS_AVIVO_VARIANT)
-	    BEGIN_RING(4 * vtx_count + 6);
+	if (IS_R300_3D || IS_R500_3D)
+	    BEGIN_RING(4 * vtx_count + 4);
 	else
 	    BEGIN_RING(4 * vtx_count + 2);
 
@@ -1858,8 +1838,8 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
     }
 
 #else /* ACCEL_CP */
-    if (IS_R300_VARIANT || IS_AVIVO_VARIANT)
-	BEGIN_ACCEL(3 + vtx_count * 4);
+    if (IS_R300_3D || IS_R500_3D)
+	BEGIN_ACCEL(2 + vtx_count * 4);
     else
 	BEGIN_ACCEL(1 + vtx_count * 4);
 
@@ -1888,10 +1868,9 @@ static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst,
 	    xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0],
 	    xFixedToFloat(maskTopRight.x) / info->texW[1],    xFixedToFloat(maskTopRight.y) / info->texH[1]);
 
-    if (IS_R300_VARIANT || IS_AVIVO_VARIANT) {
-	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
-	OUT_ACCEL_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
-    }
+    if (IS_R300_3D || IS_R500_3D)
+	/* flushing is pipelined, free/finish is not */
+	OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
 
 #ifdef ACCEL_CP
     ADVANCE_RING();
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index aebc7ac..6ace342 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -3885,6 +3885,7 @@
 #define R300_GB_SELECT				        0x401c
 #define R300_GB_ENABLE				        0x4008
 #define R300_GB_AA_CONFIG				0x4020
+#define R400_GB_PIPE_SELECT                             0x402c
 #define R300_GB_MSPOS0				        0x4010
 #       define R300_MS_X0_SHIFT                         0
 #       define R300_MS_Y0_SHIFT                         4
@@ -3942,6 +3943,8 @@
 #       define R300_ALPHA3_SHADING_GOURAUD              (2 << 14)
 #define R300_GA_OFFSET				        0x4290
 
+#define R500_SU_REG_DEST                                0x42c8
+
 #define R300_VAP_CNTL_STATUS				0x2140
 #       define R300_PVS_BYPASS                          (1 << 8)
 #define R300_VAP_PVS_STATE_FLUSH_REG		        0x2284
@@ -3952,6 +3955,7 @@
 #       define R300_VF_MAX_VTX_NUM_SHIFT                18
 #       define R300_GL_CLIP_SPACE_DEF                   (0 << 22)
 #       define R300_DX_CLIP_SPACE_DEF                   (1 << 22)
+#       define R500_TCL_STATE_OPTIMIZATION              (1 << 23)
 #define R300_VAP_VTE_CNTL				0x20B0
 #       define R300_VPORT_X_SCALE_ENA                   (1 << 0)
 #       define R300_VPORT_X_OFFSET_ENA                  (1 << 1)
@@ -4191,6 +4195,8 @@
 #       define R300_BOUNDARY_EDGE_FLAG_ENA              (1 << 18)
 #define R300_VAP_PVS_STATE_FLUSH_REG			0x2284
 
+#define R500_VAP_INDEX_OFFSET			        0x208c
+
 #define R300_SU_TEX_WRAP				0x42a0
 #define R300_SU_POLY_OFFSET_ENABLE		        0x42b4
 #define R300_SU_CULL_MODE				0x42b8
@@ -4604,12 +4610,19 @@
 #define R300_FG_FOG_BLEND				0x4bc0
 #define R300_FG_ALPHA_FUNC				0x4bd4
 
+#define R300_RB2D_DSTCACHE_CTLSTAT		        0x342c
+#       define R300_DC_FLUSH_2D                         (1 << 0)
+#       define R300_DC_FREE_2D                          (1 << 2)
+#       define R300_RB2D_DC_FLUSH_ALL                   (R300_DC_FLUSH_2D | R300_DC_FREE_2D)
 #define R300_RB3D_DSTCACHE_CTLSTAT		        0x4e4c
 #       define R300_DC_FLUSH_3D                         (2 << 0)
 #       define R300_DC_FREE_3D                          (2 << 2)
+#       define R300_RB3D_DC_FLUSH_ALL                   (R300_DC_FLUSH_3D | R300_DC_FREE_3D)
+#       define R300_DC_FINISH_3D                        (1 << 4)
 #define R300_RB3D_ZCACHE_CTLSTAT			0x4f18
 #       define R300_ZC_FLUSH                            (1 << 0)
 #       define R300_ZC_FREE                             (1 << 1)
+#       define R300_ZC_FLUSH_ALL                        0x3
 #define R300_WAIT_UNTIL				        0x1720
 #       define R300_WAIT_2D_IDLECLEAN                   (1 << 16)
 #       define R300_WAIT_3D_IDLECLEAN                   (1 << 17)
@@ -5177,5 +5190,6 @@
 #   define R500_RS_IP_COL_FMT_RGBA			(0 << 27)
 #   define R500_RS_IP_OFFSET_EN 			(1 << 31)
 
+#define R500_DYN_SCLK_PWMEM_PIPE                        0x000d /* PLL */
 
 #endif
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index 45dc0c9..e2db615 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -589,7 +589,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 		     (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
 	} else {
 	    if (IS_R300_3D || IS_R500_3D)
-		BEGIN_RING(4 * VTX_DWORD_COUNT + 6);
+		BEGIN_RING(4 * VTX_DWORD_COUNT + 4);
 	    else
 		BEGIN_RING(4 * VTX_DWORD_COUNT + 2);
 	    OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
@@ -600,7 +600,7 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	}
 #else /* ACCEL_CP */
 	if (IS_R300_3D || IS_R500_3D)
-	    BEGIN_VIDEO(3 + VTX_DWORD_COUNT * 4);
+	    BEGIN_VIDEO(2 + VTX_DWORD_COUNT * 4);
 	else
 	    BEGIN_VIDEO(1 + VTX_DWORD_COUNT * 4);
 
@@ -625,10 +625,9 @@ FUNC_NAME(RADEONDisplayTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv
 	VTX_OUT((float)(dstX + dstw),                                (float)dstY,
 		xFixedToFloat(srcTopRight.x) / info->texW[0],     xFixedToFloat(srcTopRight.y) / info->texH[0]);
 
-	if (IS_R300_3D || IS_R500_3D) {
-	    OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D | R300_DC_FREE_3D);
-	    OUT_VIDEO_REG(RADEON_WAIT_UNTIL, RADEON_WAIT_3D_IDLECLEAN);
-	}
+	if (IS_R300_3D || IS_R500_3D)
+	    /* flushing is pipelined, free/finish is not */
+	    OUT_VIDEO_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
 
 #ifdef ACCEL_CP
 	ADVANCE_RING();


More information about the xorg-commit mailing list