xf86-video-ati: Branch 'cayman_accel' - 30 commits

Dave Airlie airlied at kemper.freedesktop.org
Mon May 23 23:00:52 PDT 2011


Rebased ref, commits from common ancestor:
commit 82cb33c3f0e1ba802d7a94f3159b3c5c86cd4043
Author: Dave Airlie <airlied at redhat.com>
Date:   Wed May 18 14:49:17 2011 +1000

    cayman: enable all accel

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 9a166b0..5752d0e 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -90,8 +90,6 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     struct r600_accel_object dst;
     int scissor_height;
 
-    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
-	return FALSE;
 
     if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
 	RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
@@ -466,9 +464,6 @@ EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     struct radeon_accel_state *accel_state = info->accel_state;
     struct r600_accel_object src_obj, dst_obj;
 
-    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
-	return FALSE;
-
     if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
 	RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
     if (!RADEONCheckBPP(pDst->drawable.bitsPerPixel))
@@ -1126,9 +1121,6 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     float *cbuf;
     int scissor_height;
 
-    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
-	return FALSE;
-
     if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
 	return FALSE;
 
@@ -1467,9 +1459,6 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
 	}
     }
 
-    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
-	goto copy;
-
     scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
     height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
     base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
@@ -1595,9 +1584,6 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
 
     }
 
-    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
-	goto copy;
-
     if (!accel_state->allowHWDFS)
 	goto copy;
 
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index ce5d2e1..6200cdc 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -114,9 +114,6 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     float *vs_alu_consts;
     const_config_t vs_const_conf;
 
-    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
-	return;
-
     cont = RTFContrast(pPriv->contrast);
     bright = RTFBrightness(pPriv->brightness);
     gamma = (float)pPriv->gamma / 1000.0;
commit b77d374b0d11f48c33cfffdb4157c4ec4b05ea15
Author: Dave Airlie <airlied at redhat.com>
Date:   Tue May 24 15:53:58 2011 +1000

    cayman: add scissors workaround.
    
    wasted a lot of time getting to this.

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 58f3bf1..9a166b0 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -58,6 +58,24 @@ extern int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
 static void
 EVERGREENDoneSolid(PixmapPtr pPix);
 
+/* this is a workaround, but not sure for what -
+ */
+static inline int cayman_adjust_scissor(ScrnInfoPtr pScrn, int height)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    if (info->ChipFamily != CHIP_FAMILY_CAYMAN)
+	return height;
+
+    if (height == 0)
+	return 0;
+
+    /* I've no idea if all scissors need to be even height or if its
+       just a 1 -> 2 problem */
+    if (height & 1)
+	height++;
+    return height;
+}
+
 static Bool
 EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 {
@@ -70,6 +88,7 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     float *ps_alu_consts;
     const_config_t ps_const_conf;
     struct r600_accel_object dst;
+    int scissor_height;
 
     if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
 	return FALSE;
@@ -108,9 +127,10 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 
     evergreen_set_default_state(pScrn);
 
-    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    scissor_height = cayman_adjust_scissor(pScrn, accel_state->dst_obj.height);
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
 
     /* Shader */
     vs_conf.shader_addr         = accel_state->vs_mc_addr;
@@ -261,6 +281,7 @@ EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
     tex_resource_t  tex_res;
     tex_sampler_t   tex_samp;
     shader_config_t vs_conf, ps_conf;
+    int scissor_height;
 
     CLEAR (cb_conf);
     CLEAR (tex_res);
@@ -273,9 +294,10 @@ EVERGREENDoPrepareCopy(ScrnInfoPtr pScrn)
 
     evergreen_set_default_state(pScrn);
 
-    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    scissor_height = cayman_adjust_scissor(pScrn, accel_state->dst_obj.height);
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
 
     /* Shader */
     vs_conf.shader_addr         = accel_state->vs_mc_addr;
@@ -1102,6 +1124,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     const_config_t vs_const_conf;
     struct r600_accel_object src_obj, mask_obj, dst_obj;
     float *cbuf;
+    int scissor_height;
 
     if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
 	return FALSE;
@@ -1192,9 +1215,10 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
 
     evergreen_set_default_state(pScrn);
 
-    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
-    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, accel_state->dst_obj.height);
+    scissor_height = cayman_adjust_scissor(pScrn, accel_state->dst_obj.height);
+    evergreen_set_generic_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
+    evergreen_set_screen_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
+    evergreen_set_window_scissor(pScrn, 0, 0, accel_state->dst_obj.width, scissor_height);
 
     if (!EVERGREENTextureSetup(pSrcPicture, pSrc, 0)) {
         radeon_ib_discard(pScrn);
commit ffeab7a7058298e15294a3b2c740c731e36dda1d
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Apr 18 18:16:51 2011 -0400

    cayman: fix dword counts default state
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/cayman_accel.c b/src/cayman_accel.c
index 3d803c2..1dfaece 100644
--- a/src/cayman_accel.c
+++ b/src/cayman_accel.c
@@ -158,12 +158,12 @@ cayman_set_default_state(ScrnInfoPtr pScrn)
     for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
 	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
 
-    BEGIN_BATCH(66);
+    BEGIN_BATCH(73);
     PACK0(PA_SC_MODE_CNTL_0, 2);
     E32(0); // PA_SC_MODE_CNTL_0
     E32(0); // PA_SC_MODE_CNTL_1
 
-    PACK0(PA_SC_CENTROID_PRIORITY_0, 34);
+    PACK0(PA_SC_CENTROID_PRIORITY_0, 27);
     E32((0 << DISTANCE_0_shift) |
 	(1 << DISTANCE_1_shift) |
 	(2 << DISTANCE_2_shift) |
commit 3cbfae361bf5e779d3364f0f31cfd25bd0f59e65
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Mar 2 20:48:19 2011 -0500

    cayman: add spi state to default state
    
    changed in e3145801b80fd4be4cf770128876e86e89bda66f
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/cayman_accel.c b/src/cayman_accel.c
index 8dd901a..3d803c2 100644
--- a/src/cayman_accel.c
+++ b/src/cayman_accel.c
@@ -163,7 +163,7 @@ cayman_set_default_state(ScrnInfoPtr pScrn)
     E32(0); // PA_SC_MODE_CNTL_0
     E32(0); // PA_SC_MODE_CNTL_1
 
-    PACK0(PA_SC_CENTROID_PRIORITY_0, 27);
+    PACK0(PA_SC_CENTROID_PRIORITY_0, 34);
     E32((0 << DISTANCE_0_shift) |
 	(1 << DISTANCE_1_shift) |
 	(2 << DISTANCE_2_shift) |
@@ -227,6 +227,17 @@ cayman_set_default_state(ScrnInfoPtr pScrn)
     E32(0);
     E32(0);
 
+    /* src = semantic id 0; mask = semantic id 1 */
+    EREG(SPI_VS_OUT_ID_0, ((0 << SEMANTIC_0_shift) |
+			   (1 << SEMANTIC_1_shift)));
+    PACK0(SPI_PS_INPUT_CNTL_0 + (0 << 2), 2);
+    /* SPI_PS_INPUT_CNTL_0 maps to GPR[0] - load with semantic id 0 */
+    E32(((0    << SEMANTIC_shift)	|
+	 (0x01 << DEFAULT_VAL_shift)));
+    /* SPI_PS_INPUT_CNTL_1 maps to GPR[1] - load with semantic id 1 */
+    E32(((1    << SEMANTIC_shift)	|
+	 (0x01 << DEFAULT_VAL_shift)));
+
     PACK0(SPI_INPUT_Z, 13);
     E32(0); // SPI_INPUT_Z
     E32(0); // SPI_FOG_CNTL
commit b8ade97c9d0fa5aacb0e3166868bb72e9bc679a6
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Mar 2 20:44:19 2011 -0500

    cayman: first pass at exa/Xv shaders
    
    Main differences with evergreen:
    - 4-way rather than 5-way
    - END_OF_PROGRAM bit removed from CF istructions, use
    CF_INST_END instead.
    - MEGA_FETCH* fields removed from VTX commands
    - no more VC, all fetches go through the TC
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/Makefile.am b/src/Makefile.am
index f52b761..8d4cf17 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -54,7 +54,7 @@ endif
 
 if USE_EXA
 RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c \
-	evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c cayman_accel.c
+	evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c cayman_accel.c cayman_shader.c
 endif
 
 AM_CFLAGS = \
@@ -136,6 +136,7 @@ EXTRA_DIST = \
 	evergreen_state.h \
 	cayman_reg.h \
 	cayman_reg_auto.h \
+	cayman_shader.h \
 	ati.h \
 	ativersion.h \
 	bicubic_table.h \
diff --git a/src/cayman_shader.c b/src/cayman_shader.c
new file mode 100644
index 0000000..2cdcc68
--- /dev/null
+++ b/src/cayman_shader.c
@@ -0,0 +1,3125 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Author: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef XF86DRM_MODE
+
+#include "xf86.h"
+
+#include "cayman_shader.h"
+#include "cayman_reg.h"
+
+/* solid vs --------------------------------------- */
+int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 2 - always export a param whether it's used or not */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 3 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* solid ps --------------------------------------- */
+int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(3),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+
+    /* 2 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 3 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 4 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 6 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MOV),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    return i;
+}
+
+/* copy vs --------------------------------------- */
+int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(4),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 3 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 4/5 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 6/7 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* copy ps --------------------------------------- */
+int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* CF INST 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(4),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* CF INST 1 */
+    shader[i++] = CF_DWORD0(ADDR(8),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+    /* CF INST 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* CF INST 3 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 4 interpolate tex coords */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 5 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 6 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 7 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 8/9 TEX INST 0 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X), /* R */
+			     DST_SEL_Y(SQ_SEL_Y), /* G */
+			     DST_SEL_Z(SQ_SEL_Z), /* B */
+			     DST_SEL_W(SQ_SEL_W), /* A */
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_UNNORMALIZED),
+			     COORD_TYPE_Y(TEX_UNNORMALIZED),
+			     COORD_TYPE_Z(TEX_UNNORMALIZED),
+			     COORD_TYPE_W(TEX_UNNORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(8),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(2),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TC),
+                            BARRIER(1));
+
+    /* 1 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(5),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(2),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 2 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+                                          TYPE(SQ_EXPORT_POS),
+                                          RW_GPR(1),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               BURST_COUNT(1),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               MARK(0),
+                                               BARRIER(1));
+    /* 3 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+                                          TYPE(SQ_EXPORT_PARAM),
+                                          RW_GPR(0),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               BURST_COUNT(1),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               MARK(0),
+                                               BARRIER(0));
+    /* 4 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 5 texX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 6 texY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 7 - padding */
+    shader[i++] = 0x00000000;
+    shader[i++] = 0x00000000;
+    /* 8/9 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+                             FETCH_WHOLE_QUAD(0),
+                             BUFFER_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_SEL_X(SQ_SEL_X),
+                                 DST_SEL_Y(SQ_SEL_Y),
+                                 DST_SEL_Z(SQ_SEL_0),
+                                 DST_SEL_W(SQ_SEL_1),
+                                 USE_CONST_FIELDS(0),
+                                 DATA_FORMAT(FMT_32_32_FLOAT),
+                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+                             ENDIAN_SWAP(ENDIAN_NONE),
+                             CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 10/11 */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+                             FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+                             FETCH_WHOLE_QUAD(0),
+                             BUFFER_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_SEL_X(SQ_SEL_X),
+                                 DST_SEL_Y(SQ_SEL_Y),
+                                 DST_SEL_Z(SQ_SEL_0),
+                                 DST_SEL_W(SQ_SEL_1),
+                                 USE_CONST_FIELDS(0),
+                                 DATA_FORMAT(FMT_32_32_FLOAT),
+                                 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+                                 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+                                 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+                             ENDIAN_SWAP(ENDIAN_NONE),
+                             CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(6),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(22),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+    /* 2 */
+    shader[i++] = CF_DWORD0(ADDR(30),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+    /* 3 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(10),
+                                KCACHE_BANK0(0),
+                                KCACHE_BANK1(0),
+                                KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+                                KCACHE_ADDR0(0),
+                                KCACHE_ADDR1(0),
+                                I_COUNT(12),
+                                ALT_CONST(0),
+                                CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+                                BARRIER(1));
+    /* 4 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+                                          TYPE(SQ_EXPORT_PIXEL),
+                                          RW_GPR(2),
+                                          RW_REL(ABSOLUTE),
+                                          INDEX_GPR(0),
+                                          ELEM_SIZE(3));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+                                               SRC_SEL_Y(SQ_SEL_Y),
+                                               SRC_SEL_Z(SQ_SEL_Z),
+                                               SRC_SEL_W(SQ_SEL_W),
+                                               BURST_COUNT(1),
+                                               VALID_PIXEL_MODE(0),
+                                               CF_INST(SQ_CF_INST_EXPORT_DONE),
+                                               MARK(0),
+                                               BARRIER(1));
+    /* 5 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 6 interpolate tex coords */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 7 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 8 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 9 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 10,11,12,13 */
+    /* r2.x = MAD(c0.w, r1.x, c0.x) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* r2.y = MAD(c0.w, r1.x, c0.y) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* r2.z = MAD(c0.w, r1.x, c0.z) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* r2.w = MAD(0, 0, 1) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 14,15,16,17 */
+    /* r2.x = MAD(c1.x, r1.y, pv.x) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+    /* r2.y = MAD(c1.y, r1.y, pv.y) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* r2.z = MAD(c1.z, r1.y, pv.z) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+    /* r2.w = MAD(0, 0, 1) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_W),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+    /* 18,19,20,21 */
+    /* r2.x = MAD(c2.x, r1.z, pv.x) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(1));
+    /* r2.y = MAD(c2.y, r1.z, pv.y) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Y),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(1));
+    /* r2.z = MAD(c2.z, r1.z, pv.z) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_PV),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_Z),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(1));
+    /* r2.w = MAD(0, 0, 1) */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(SQ_ALU_SRC_0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(SQ_ALU_SRC_0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP3(SRC2_SEL(SQ_ALU_SRC_1),
+                                 SRC2_REL(ABSOLUTE),
+                                 SRC2_ELEM(ELEM_X),
+                                 SRC2_NEG(0),
+                                 ALU_INST(SQ_OP3_INST_MULADD),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(1));
+
+    /* 22 */
+    shader[i++] = CF_DWORD0(ADDR(24),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(3),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TC),
+                            BARRIER(1));
+    /* 23 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(1));
+    /* 24/25 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_X),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_1),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(0),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 26/27 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(1),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_X),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(1),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 28/29 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(2),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_X),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(2),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 30 */
+    shader[i++] = CF_DWORD0(ADDR(32),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_ACTIVE),
+                            I_COUNT(2),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_TC),
+                            BARRIER(1));
+    /* 31 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(1));
+    /* 32/33 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(0),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_X),
+                             DST_SEL_Y(SQ_SEL_MASK),
+                             DST_SEL_Z(SQ_SEL_MASK),
+                             DST_SEL_W(SQ_SEL_1),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(0),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 34/35 */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+                             INST_MOD(0),
+                             FETCH_WHOLE_QUAD(0),
+                             RESOURCE_ID(1),
+                             SRC_GPR(0),
+                             SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+                             DST_REL(ABSOLUTE),
+                             DST_SEL_X(SQ_SEL_MASK),
+                             DST_SEL_Y(SQ_SEL_X),
+                             DST_SEL_Z(SQ_SEL_Y),
+                             DST_SEL_W(SQ_SEL_MASK),
+                             LOD_BIAS(0),
+                             COORD_TYPE_X(TEX_NORMALIZED),
+                             COORD_TYPE_Y(TEX_NORMALIZED),
+                             COORD_TYPE_Z(TEX_NORMALIZED),
+                             COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+                             OFFSET_Y(0),
+                             OFFSET_Z(0),
+                             SAMPLER_ID(1),
+                             SRC_SEL_X(SQ_SEL_X),
+                             SRC_SEL_Y(SQ_SEL_Y),
+                             SRC_SEL_Z(SQ_SEL_0),
+                             SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp vs --------------------------------------- */
+int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(3),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(9),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+    /* 2 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 3 - mask sub */
+    shader[i++] = CF_DWORD0(ADDR(44),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(3),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+
+    /* 4 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(14),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(20),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 5 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 6 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT),
+					       MARK(0),
+					       BARRIER(0));
+    /* 7 - mask */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(1),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 8 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(1));
+    /* 9 - non-mask sub */
+    shader[i++] = CF_DWORD0(ADDR(50),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+
+    /* 10 - ALU */
+    shader[i++] = CF_ALU_DWORD0(ADDR(34),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(10),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 11 - dst */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_POS0),
+					  TYPE(SQ_EXPORT_POS),
+					  RW_GPR(1),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 12 - src */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(0),
+					  TYPE(SQ_EXPORT_PARAM),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(0));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_0),
+					       SRC_SEL_W(SQ_SEL_1),
+					       BURST_COUNT(0),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(0));
+    /* 13 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(1));
+
+    /* 14 srcX.x DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 15 srcX.y DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 16 srcX.z DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 17 srcX.w DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 18 srcY.x DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 19 srcY.y DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 20 srcY.z DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 21 srcY.w DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 1),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(3),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 22 maskX.x DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 23 maskX.y DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 24 maskX.z DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 25 maskX.w DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 26 maskY.x DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 27 maskY.y DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 28 maskY.z DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 29 maskY.w DOT4 - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(4),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 30 srcX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 31 srcY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 3),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(1),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 32 maskX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 2),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 33 maskY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 4),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 3),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 34 srcX.x DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 35 srcX.y DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 36 srcX.z DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 37 srcX.w DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 38 srcY.x DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_X),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 39 srcY.y DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Y),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+
+    /* 40 srcY.z DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Z),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_Z),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Z),
+                                 CLAMP(0));
+
+    /* 41 srcY.w DOT4 - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_W),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_LOOP),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(0),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_DOT4),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(2),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_W),
+                                 CLAMP(0));
+
+    /* 42 srcX / w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_X),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 0),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_X),
+                                 CLAMP(0));
+
+    /* 43 srcY / h */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 2),
+                             SRC0_REL(ABSOLUTE),
+                             SRC0_ELEM(ELEM_Y),
+                             SRC0_NEG(0),
+                             SRC1_SEL(ALU_SRC_KCACHE0_BASE + 1),
+                             SRC1_REL(ABSOLUTE),
+                             SRC1_ELEM(ELEM_W),
+                             SRC1_NEG(0),
+                             INDEX_MODE(SQ_INDEX_AR_X),
+                             PRED_SEL(SQ_PRED_SEL_OFF),
+                             LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+                                 SRC1_ABS(0),
+                                 UPDATE_EXECUTE_MASK(0),
+                                 UPDATE_PRED(0),
+                                 WRITE_MASK(1),
+                                 OMOD(SQ_ALU_OMOD_OFF),
+                                 ALU_INST(SQ_OP2_INST_MUL),
+                                 BANK_SWIZZLE(SQ_ALU_VEC_012),
+                                 DST_GPR(0),
+                                 DST_REL(ABSOLUTE),
+                                 DST_ELEM(ELEM_Y),
+                                 CLAMP(0));
+    /* mask vfetch - 44/45 - dst */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(2),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 46/47 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 48/49 - mask */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(16),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    /* no mask vfetch - 50/51 - dst */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(1),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_0),
+				 DST_SEL_W(SQ_SEL_1),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(0),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+			     ALT_CONST(0),
+			     BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+    /* 52/53 - src */
+    shader[i++] = VTX_DWORD0(VTX_INST(SQ_VTX_INST_FETCH),
+			     FETCH_TYPE(SQ_VTX_FETCH_VERTEX_DATA),
+			     FETCH_WHOLE_QUAD(0),
+			     BUFFER_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     STRUCTURED_READ(SQ_VTX_STRU_READ_OFF),
+			     LDS_REQ(0),
+			     COALESCED_READ(0));
+    shader[i++] = VTX_DWORD1_GPR(DST_GPR(0),
+				 DST_REL(0),
+				 DST_SEL_X(SQ_SEL_X),
+				 DST_SEL_Y(SQ_SEL_Y),
+				 DST_SEL_Z(SQ_SEL_1),
+				 DST_SEL_W(SQ_SEL_0),
+				 USE_CONST_FIELDS(0),
+				 DATA_FORMAT(FMT_32_32_FLOAT),
+				 NUM_FORMAT_ALL(SQ_NUM_FORMAT_SCALED),
+				 FORMAT_COMP_ALL(SQ_FORMAT_COMP_SIGNED),
+				 SRF_MODE_ALL(SRF_MODE_ZERO_CLAMP_MINUS_ONE));
+    shader[i++] = VTX_DWORD2(OFFSET(8),
+			     ENDIAN_SWAP(ENDIAN_NONE),
+			     CONST_BUF_NO_STRIDE(0),
+                             ALT_CONST(0),
+                             BUFFER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = VTX_DWORD_PAD;
+
+    return i;
+}
+
+/* comp ps --------------------------------------- */
+int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* shader)
+{
+    int i = 0;
+
+    /* 0 */
+    shader[i++] = CF_DWORD0(ADDR(3),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+    /* 1 */
+    shader[i++] = CF_DWORD0(ADDR(8),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+                            CF_CONST(0),
+                            COND(SQ_CF_COND_NOT_BOOL),
+                            I_COUNT(0),
+                            VALID_PIXEL_MODE(0),
+                            CF_INST(SQ_CF_INST_CALL),
+                            BARRIER(0));
+    /* 2 - end */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_END),
+			    BARRIER(1));
+    /* 3 - mask sub */
+    shader[i++] = CF_ALU_DWORD0(ADDR(12),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(8),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 4 */
+    shader[i++] = CF_DWORD0(ADDR(28),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(2),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+
+    /* 5 */
+    shader[i++] = CF_ALU_DWORD0(ADDR(20),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_LOCK_1));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+
+    /* 6 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(2),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+    /* 7 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(1));
+
+    /* 8 - non-mask sub */
+    shader[i++] = CF_ALU_DWORD0(ADDR(24),
+				KCACHE_BANK0(0),
+				KCACHE_BANK1(0),
+				KCACHE_MODE0(SQ_CF_KCACHE_NOP));
+    shader[i++] = CF_ALU_DWORD1(KCACHE_MODE1(SQ_CF_KCACHE_NOP),
+				KCACHE_ADDR0(0),
+				KCACHE_ADDR1(0),
+				I_COUNT(4),
+				ALT_CONST(0),
+				CF_INST(SQ_CF_INST_ALU),
+				WHOLE_QUAD_MODE(0),
+				BARRIER(1));
+    /* 9 */
+    shader[i++] = CF_DWORD0(ADDR(32),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(1),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_TC),
+			    BARRIER(1));
+
+    /* 10 */
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD0(ARRAY_BASE(CF_PIXEL_MRT0),
+					  TYPE(SQ_EXPORT_PIXEL),
+					  RW_GPR(0),
+					  RW_REL(ABSOLUTE),
+					  INDEX_GPR(0),
+					  ELEM_SIZE(1));
+    shader[i++] = CF_ALLOC_IMP_EXP_DWORD1_SWIZ(SRC_SEL_X(SQ_SEL_X),
+					       SRC_SEL_Y(SQ_SEL_Y),
+					       SRC_SEL_Z(SQ_SEL_Z),
+					       SRC_SEL_W(SQ_SEL_W),
+					       BURST_COUNT(1),
+					       VALID_PIXEL_MODE(0),
+					       CF_INST(SQ_CF_INST_EXPORT_DONE),
+					       MARK(0),
+					       BARRIER(1));
+
+    /* 11 */
+    shader[i++] = CF_DWORD0(ADDR(0),
+			    JUMPTABLE_SEL(SQ_CF_JUMPTABLE_SEL_CONST_A));
+    shader[i++] = CF_DWORD1(POP_COUNT(0),
+			    CF_CONST(0),
+			    COND(SQ_CF_COND_ACTIVE),
+			    I_COUNT(0),
+			    VALID_PIXEL_MODE(0),
+			    CF_INST(SQ_CF_INST_RETURN),
+			    BARRIER(1));
+
+    /* 12 interpolate src tex coords - mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 13 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 14 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 15 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(1),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 16 interpolate mask tex coords */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 17 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 18 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 19 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 20 - alu 0 */
+    /* MUL gpr[2].x gpr[0].x gpr[1].x */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(1));
+    /* 21 - alu 1 */
+    /* MUL gpr[2].y gpr[0].y gpr[1].y */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Y),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(1));
+    /* 22 - alu 2 */
+    /* MUL gpr[2].z gpr[0].z gpr[1].z */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Z),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_Z),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(1));
+    /* 23 - alu 3 */
+    /* MUL gpr[2].w gpr[0].w gpr[1].w */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_W),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_GPR_BASE + 1),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_W),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_LOOP),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_MUL),
+				 BANK_SWIZZLE(SQ_ALU_VEC_012),
+				 DST_GPR(2),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(1));
+
+    /* 24 - interpolate tex coords - non-mask */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_X),
+				 CLAMP(0));
+    /* 25 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(1),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Y),
+				 CLAMP(0));
+    /* 26 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_Y),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(0));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_Z),
+				 CLAMP(0));
+    /* 27 */
+    shader[i++] = ALU_DWORD0(SRC0_SEL(ALU_SRC_GPR_BASE + 0),
+			     SRC0_REL(ABSOLUTE),
+			     SRC0_ELEM(ELEM_X),
+			     SRC0_NEG(0),
+			     SRC1_SEL(ALU_SRC_PARAM_BASE + 0),
+			     SRC1_REL(ABSOLUTE),
+			     SRC1_ELEM(ELEM_X),
+			     SRC1_NEG(0),
+			     INDEX_MODE(SQ_INDEX_AR_X),
+			     PRED_SEL(SQ_PRED_SEL_OFF),
+			     LAST(1));
+    shader[i++] = ALU_DWORD1_OP2(SRC0_ABS(0),
+				 SRC1_ABS(0),
+				 UPDATE_EXECUTE_MASK(0),
+				 UPDATE_PRED(0),
+				 WRITE_MASK(0),
+				 OMOD(SQ_ALU_OMOD_OFF),
+				 ALU_INST(SQ_OP2_INST_INTERP_XY),
+				 BANK_SWIZZLE(SQ_ALU_VEC_210),
+				 DST_GPR(0),
+				 DST_REL(ABSOLUTE),
+				 DST_ELEM(ELEM_W),
+				 CLAMP(0));
+
+    /* 28/29 - src - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(1),
+			     SRC_REL(ABSOLUTE),
+			     ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(1),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+    /* 30/31 - mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(1),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+                             ALT_CONST(0),
+                             RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+                             SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(1),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    /* 32/33 - src - non-mask */
+    shader[i++] = TEX_DWORD0(TEX_INST(SQ_TEX_INST_SAMPLE),
+			     INST_MOD(0),
+			     FETCH_WHOLE_QUAD(0),
+			     RESOURCE_ID(0),
+			     SRC_GPR(0),
+			     SRC_REL(ABSOLUTE),
+			     ALT_CONST(0),
+			     RESOURCE_INDEX_MODE(SQ_CF_INDEX_NONE),
+			     SAMPLER_INDEX_MODE(SQ_CF_INDEX_NONE));
+    shader[i++] = TEX_DWORD1(DST_GPR(0),
+			     DST_REL(ABSOLUTE),
+			     DST_SEL_X(SQ_SEL_X),
+			     DST_SEL_Y(SQ_SEL_Y),
+			     DST_SEL_Z(SQ_SEL_Z),
+			     DST_SEL_W(SQ_SEL_W),
+			     LOD_BIAS(0),
+			     COORD_TYPE_X(TEX_NORMALIZED),
+			     COORD_TYPE_Y(TEX_NORMALIZED),
+			     COORD_TYPE_Z(TEX_NORMALIZED),
+			     COORD_TYPE_W(TEX_NORMALIZED));
+    shader[i++] = TEX_DWORD2(OFFSET_X(0),
+			     OFFSET_Y(0),
+			     OFFSET_Z(0),
+			     SAMPLER_ID(0),
+			     SRC_SEL_X(SQ_SEL_X),
+			     SRC_SEL_Y(SQ_SEL_Y),
+			     SRC_SEL_Z(SQ_SEL_0),
+			     SRC_SEL_W(SQ_SEL_1));
+    shader[i++] = TEX_DWORD_PAD;
+
+    return i;
+}
+
+#endif
diff --git a/src/cayman_shader.h b/src/cayman_shader.h
new file mode 100644
index 0000000..76a74d8
--- /dev/null
+++ b/src/cayman_shader.h
@@ -0,0 +1,279 @@
+/*
+ * Cayman shaders
+ *
+ * Copyright (C) 2011  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Shader macros
+ */
+
+#ifndef __SHADER_H__
+#define __SHADER_H__
+
+#include "radeon.h"
+
+/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
+
+
+// CF insts
+// addr
+#define ADDR(x)  (x)
+// jumptable
+#define JUMPTABLE_SEL(x) (x)
+// pc
+#define POP_COUNT(x)      (x)
+// const
+#define CF_CONST(x)       (x)
+// cond
+#define COND(x)        (x)		// SQ_COND_*
+// count
+#define I_COUNT(x)        ((x) ? ((x) - 1) : 0)
+// vpm
+#define VALID_PIXEL_MODE(x) (x)
+// cf inst
+#define CF_INST(x)        (x)		// SQ_CF_INST_*
+// wqm
+#define WHOLE_QUAD_MODE(x)  (x)
+// barrier
+#define BARRIER(x)          (x)
+//kb0
+#define KCACHE_BANK0(x)          (x)
+//kb1
+#define KCACHE_BANK1(x)          (x)
+// km0/1
+#define KCACHE_MODE0(x)          (x)
+#define KCACHE_MODE1(x)          (x)	// SQ_CF_KCACHE_*
+//
+#define KCACHE_ADDR0(x)          (x)
+#define KCACHE_ADDR1(x)          (x)
+
+#define ALT_CONST(x)            (x)
+
+#define ARRAY_BASE(x)        (x)
+// export pixel
+#define CF_PIXEL_MRT0         0
+#define CF_PIXEL_MRT1         1
+#define CF_PIXEL_MRT2         2
+#define CF_PIXEL_MRT3         3
+#define CF_PIXEL_MRT4         4
+#define CF_PIXEL_MRT5         5
+#define CF_PIXEL_MRT6         6
+#define CF_PIXEL_MRT7         7
+// computed Z
+#define CF_COMPUTED_Z         61
+// export pos
+#define CF_POS0               60
+#define CF_POS1               61
+#define CF_POS2               62
+#define CF_POS3               63
+// export param
+// 0...31
+#define TYPE(x)              (x)	// SQ_EXPORT_*
+#define RW_GPR(x)            (x)
+#define RW_REL(x)            (x)
+#define ABSOLUTE                  0
+#define RELATIVE                  1
+#define INDEX_GPR(x)            (x)
+#define ELEM_SIZE(x)            (x ? (x - 1) : 0)
+#define BURST_COUNT(x)          (x ? (x - 1) : 0)
+#define MARK(x)         (x)
+
+// swiz
+#define SRC_SEL_X(x)    (x)		// SQ_SEL_* each
+#define SRC_SEL_Y(x)    (x)
+#define SRC_SEL_Z(x)    (x)
+#define SRC_SEL_W(x)    (x)
+
+#define CF_DWORD0(addr, jmptbl) ((addr) | ((jmptbl) << 24))
+#define CF_DWORD1(pc, cf_const, cond, count, vpm, cf_inst, b) \
+        (((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | ((count) << 10) | \
+         ((vpm) << 20) | ((cf_inst) << 22) | ((b) << 31))
+
+#define CF_ALU_DWORD0(addr, kb0, kb1, km0) (((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30))
+#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, alt_const, cf_inst, wqm, b) \
+        (((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
+	 ((count) << 18) | ((alt_const) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31))
+
+#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
+	 (((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | \
+	  ((index_gpr) << 23) | ((es) << 30))
+#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, bc, vpm, cf_inst, m, b) \
+        (((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | \
+	 ((bc) << 16) | ((vpm) << 20) | ((cf_inst) << 22) | ((m) << 30) | ((b) << 31))
+
+// ALU clause insts
+#define SRC0_SEL(x)        (x)
+#define SRC1_SEL(x)        (x)
+#define SRC2_SEL(x)        (x)
+// src[0-2]_sel
+//   0-127 GPR
+// 128-159 kcache constants bank 0
+// 160-191 kcache constants bank 1
+// 192-255 inline const values
+// 256-287 kcache constants bank 2
+// 288-319 kcache constants bank 3
+// 219-255 special SQ_ALU_SRC_* (0, 1, etc.)
+// 488-520 src param space
+#define ALU_SRC_GPR_BASE        0
+#define ALU_SRC_KCACHE0_BASE  128
+#define ALU_SRC_KCACHE1_BASE  160
+#define ALU_SRC_INLINE_K_BASE 192
+#define ALU_SRC_KCACHE2_BASE  256
+#define ALU_SRC_KCACHE3_BASE  288
+#define ALU_SRC_PARAM_BASE    448
+
+#define SRC0_REL(x)        (x)
+#define SRC1_REL(x)        (x)
+#define SRC2_REL(x)        (x)
+// elem
+#define SRC0_ELEM(x)        (x)
+#define SRC1_ELEM(x)        (x)
+#define SRC2_ELEM(x)        (x)
+#define ELEM_X        0
+#define ELEM_Y        1
+#define ELEM_Z        2
+#define ELEM_W        3
+// neg
+#define SRC0_NEG(x)        (x)
+#define SRC1_NEG(x)        (x)
+#define SRC2_NEG(x)        (x)
+// im
+#define INDEX_MODE(x)    (x)		// SQ_INDEX_*
+// ps
+#define PRED_SEL(x)      (x)		// SQ_PRED_SEL_*
+// last
+#define LAST(x)          (x)
+// abs
+#define SRC0_ABS(x)       (x)
+#define SRC1_ABS(x)       (x)
+// uem
+#define UPDATE_EXECUTE_MASK(x) (x)
+// up
+#define UPDATE_PRED(x)      (x)
+// wm
+#define WRITE_MASK(x)   (x)
+// omod
+#define OMOD(x)        (x)		// SQ_ALU_OMOD_*
+// alu inst
+#define ALU_INST(x)        (x)		// SQ_ALU_INST_*
+//bs
+#define BANK_SWIZZLE(x)        (x)	// SQ_ALU_VEC_*
+#define DST_GPR(x)        (x)
+#define DST_REL(x)        (x)
+#define DST_ELEM(x)       (x)
+#define CLAMP(x)          (x)
+
+#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
+        (((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
+         ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
+	 ((im) << 26) | ((ps) << 29) | ((last) << 31))
+
+#define ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
+        (((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
+         ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
+	 ((dr) << 28) | ((de) << 29) | ((clamp) << 31))
+
+#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
+        (((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
+         ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
+	 ((de) << 29) | ((clamp) << 31))
+
+// VTX clause insts
+// vxt insts
+#define VTX_INST(x)        (x)		// SQ_VTX_INST_*
+
+// fetch type
+#define FETCH_TYPE(x)        (x)	// SQ_VTX_FETCH_*
+
+#define FETCH_WHOLE_QUAD(x)        (x)
+#define BUFFER_ID(x)        (x)
+#define SRC_GPR(x)          (x)
+#define SRC_REL(x)          (x)
+
+#define STRUCTURED_READ(x)    (x)
+#define LDS_REQ(x)            (x)
+#define COALESCED_READ(x)     (x)
+
+#define DST_SEL_X(x)          (x)
+#define DST_SEL_Y(x)          (x)
+#define DST_SEL_Z(x)          (x)
+#define DST_SEL_W(x)          (x)
+#define USE_CONST_FIELDS(x)   (x)
+#define DATA_FORMAT(x)        (x)
+// num format
+#define NUM_FORMAT_ALL(x)     (x)	// SQ_NUM_FORMAT_*
+// format comp
+#define FORMAT_COMP_ALL(x)     (x)	// SQ_FORMAT_COMP_*
+// sma
+#define SRF_MODE_ALL(x)     (x)
+#define SRF_MODE_ZERO_CLAMP_MINUS_ONE      0
+#define SRF_MODE_NO_ZERO                   1
+#define OFFSET(x)     (x)
+// endian swap
+#define ENDIAN_SWAP(x)     (x)		// SQ_ENDIAN_*
+#define CONST_BUF_NO_STRIDE(x)     (x)
+#define BUFFER_INDEX_MODE(x) (x)
+
+#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, ssy, str, ldsr, cr) \
+        (((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
+	 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((ssy) << 26) | \
+	 ((str) << 28) | ((ldsr) << 30) | ((cr) << 31))
+#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
+        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+	 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31))
+#define VTX_DWORD2(offset, es, cbns, alt_const, bim)			\
+	(((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((alt_const) << 20) | ((bim) << 21))
+#define VTX_DWORD_PAD 0x00000000
+
+// TEX clause insts
+// tex insts
+#define TEX_INST(x)     (x)		// SQ_TEX_INST_*
+#define INST_MOD(x)     (x)
+#define FETCH_WHOLE_QUAD(x)     (x)
+#define RESOURCE_ID(x)          (x)
+#define RESOURCE_INDEX_MODE(x)          (x)
+#define SAMPLER_INDEX_MODE(x)          (x)
+
+#define LOD_BIAS(x)     (x)
+//ct
+#define COORD_TYPE_X(x)     (x)
+#define COORD_TYPE_Y(x)     (x)
+#define COORD_TYPE_Z(x)     (x)
+#define COORD_TYPE_W(x)     (x)
+#define TEX_UNNORMALIZED                0
+#define TEX_NORMALIZED                  1
+#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
+#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
+#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
+#define SAMPLER_ID(x)     (x)
+
+#define TEX_DWORD0(tex_inst, im, fwq, resource_id, src_gpr, sr, ac, rim, sim) \
+	 (((tex_inst) << 0) | ((im) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
+	  ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24) | ((rim) << 25) | ((sim) << 27))
+#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
+        (((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
+	 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31))
+#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
+        (((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
+	 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29))
+#define TEX_DWORD_PAD 0x00000000
+
+#endif
diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 8879d01..58f3bf1 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -43,6 +43,18 @@
 #include "radeon_exa_shared.h"
 #include "radeon_vbo.h"
 
+extern int cayman_solid_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int cayman_solid_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int cayman_copy_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int cayman_copy_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
+extern int cayman_xv_vs(RADEONChipFamily ChipSet, uint32_t* shader);
+extern int cayman_xv_ps(RADEONChipFamily ChipSet, uint32_t* shader);
+
+extern int cayman_comp_vs(RADEONChipFamily ChipSet, uint32_t* vs);
+extern int cayman_comp_ps(RADEONChipFamily ChipSet, uint32_t* ps);
+
 static void
 EVERGREENDoneSolid(PixmapPtr pPix);
 
@@ -1685,7 +1697,7 @@ EVERGREENAllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen)
     return TRUE;
 }
 
-Bool
+static Bool
 EVERGREENLoadShaders(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -1738,6 +1750,59 @@ EVERGREENLoadShaders(ScrnInfoPtr pScrn)
     return TRUE;
 }
 
+static Bool
+CAYMANLoadShaders(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+    RADEONChipFamily ChipSet = info->ChipFamily;
+    uint32_t *shader;
+    int ret;
+
+    ret = radeon_bo_map(accel_state->shaders_bo, 1);
+    if (ret) {
+	FatalError("failed to map shader %d\n", ret);
+	return FALSE;
+    }
+    shader = accel_state->shaders_bo->ptr;
+
+    /*  solid vs --------------------------------------- */
+    accel_state->solid_vs_offset = 0;
+    cayman_solid_vs(ChipSet, shader + accel_state->solid_vs_offset / 4);
+
+    /*  solid ps --------------------------------------- */
+    accel_state->solid_ps_offset = 512;
+    cayman_solid_ps(ChipSet, shader + accel_state->solid_ps_offset / 4);
+
+    /*  copy vs --------------------------------------- */
+    accel_state->copy_vs_offset = 1024;
+    cayman_copy_vs(ChipSet, shader + accel_state->copy_vs_offset / 4);
+
+    /*  copy ps --------------------------------------- */
+    accel_state->copy_ps_offset = 1536;
+    cayman_copy_ps(ChipSet, shader + accel_state->copy_ps_offset / 4);
+
+    /*  comp vs --------------------------------------- */
+    accel_state->comp_vs_offset = 2048;
+    cayman_comp_vs(ChipSet, shader + accel_state->comp_vs_offset / 4);
+
+    /*  comp ps --------------------------------------- */
+    accel_state->comp_ps_offset = 2560;
+    cayman_comp_ps(ChipSet, shader + accel_state->comp_ps_offset / 4);
+
+    /*  xv vs --------------------------------------- */
+    accel_state->xv_vs_offset = 3072;
+    cayman_xv_vs(ChipSet, shader + accel_state->xv_vs_offset / 4);
+
+    /*  xv ps --------------------------------------- */
+    accel_state->xv_ps_offset = 3584;
+    cayman_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4);
+
+    radeon_bo_unmap(accel_state->shaders_bo);
+
+    return TRUE;
+}
+
 Bool
 EVERGREENDrawInit(ScreenPtr pScreen)
 {
@@ -1837,8 +1902,13 @@ EVERGREENDrawInit(ScreenPtr pScreen)
     if (!EVERGREENAllocShaders(pScrn, pScreen))
 	return FALSE;
 
-    if (!EVERGREENLoadShaders(pScrn))
-	return FALSE;
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN) {
+	if (!CAYMANLoadShaders(pScrn))
+	    return FALSE;
+    } else {
+	if (!EVERGREENLoadShaders(pScrn))
+	    return FALSE;
+    }
 
     exaMarkSync(pScreen);
 
diff --git a/src/radeon.h b/src/radeon.h
index a9a2b69..dd83a69 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -1314,7 +1314,6 @@ extern Bool R600DrawInit(ScreenPtr pScreen);
 extern Bool R600LoadShaders(ScrnInfoPtr pScrn);
 #ifdef XF86DRM_MODE
 extern Bool EVERGREENDrawInit(ScreenPtr pScreen);
-extern Bool EVERGREENLoadShaders(ScrnInfoPtr pScrn);
 #endif
 #endif
 
commit 01b646ed800732985c1638b147716641a99082f9
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Mar 2 20:39:38 2011 -0500

    cayman: add a default state function
    
    The rest of the state functions can be shared
    with evergreen.  I've noted where there are
    differences.
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/Makefile.am b/src/Makefile.am
index 855d99c..f52b761 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -54,7 +54,7 @@ endif
 
 if USE_EXA
 RADEON_EXA_SOURCES = radeon_exa.c r600_exa.c r6xx_accel.c r600_textured_videofuncs.c r600_shader.c radeon_exa_shared.c \
-	evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c
+	evergreen_exa.c evergreen_accel.c evergreen_shader.c evergreen_textured_videofuncs.c cayman_accel.c
 endif
 
 AM_CFLAGS = \
@@ -134,6 +134,8 @@ EXTRA_DIST = \
 	evergreen_reg_auto.h \
 	evergreen_shader.h \
 	evergreen_state.h \
+	cayman_reg.h \
+	cayman_reg_auto.h \
 	ati.h \
 	ativersion.h \
 	bicubic_table.h \
diff --git a/src/cayman_accel.c b/src/cayman_accel.c
new file mode 100644
index 0000000..8dd901a
--- /dev/null
+++ b/src/cayman_accel.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright 2011 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors: Alex Deucher <alexander.deucher at amd.com>
+ *
+ */
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef XF86DRM_MODE
+
+#include "xf86.h"
+
+#include <errno.h>
+
+#include "radeon.h"
+#include "radeon_reg.h"
+#include "cayman_reg.h"
+#include "evergreen_state.h"
+
+#include "radeon_drm.h"
+#include "radeon_vbo.h"
+#include "radeon_exa_shared.h"
+
+/*
+ * Setup of default state
+ */
+
+void
+cayman_set_default_state(ScrnInfoPtr pScrn)
+{
+    tex_resource_t tex_res;
+    shader_config_t fs_conf;
+    int i;
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct radeon_accel_state *accel_state = info->accel_state;
+
+    if (accel_state->XInited3D)
+	return;
+
+    memset(&tex_res, 0, sizeof(tex_resource_t));
+    memset(&fs_conf, 0, sizeof(shader_config_t));
+
+    accel_state->XInited3D = TRUE;
+
+    evergreen_start_3d(pScrn);
+
+    BEGIN_BATCH(21);
+    EREG(SQ_LDS_ALLOC_PS, 0);
+
+    PACK0(SQ_ESGS_RING_ITEMSIZE, 6);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(SQ_GS_VERT_ITEMSIZE, 4);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(SQ_VTX_BASE_VTX_LOC, 2);
+    E32(0);
+    E32(0);
+    END_BATCH();
+
+    /* DB */
+    BEGIN_BATCH(3 + 2);
+    EREG(DB_Z_INFO,                           0);
+    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(3 + 2);
+    EREG(DB_STENCIL_INFO,                     0);
+    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(3 + 2);
+    EREG(DB_HTILE_DATA_BASE,                    0);
+    RELOC_BATCH(accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0);
+    END_BATCH();
+
+    BEGIN_BATCH(52);
+    EREG(DB_DEPTH_INFO,                       0);
+    EREG(DB_DEPTH_CONTROL,                    0);
+
+    PACK0(PA_SC_VPORT_ZMIN_0, 2);
+    EFLOAT(0.0); // PA_SC_VPORT_ZMIN_0
+    EFLOAT(1.0); // PA_SC_VPORT_ZMAX_0
+
+    PACK0(DB_RENDER_CONTROL, 5);
+    E32(STENCIL_COMPRESS_DISABLE_bit | DEPTH_COMPRESS_DISABLE_bit); // DB_RENDER_CONTROL
+    E32(0); // DB_COUNT_CONTROL
+    E32(0); // DB_DEPTH_VIEW
+    E32(0x2a); // DB_RENDER_OVERRIDE
+    E32(0); // DB_RENDER_OVERRIDE2
+
+    PACK0(DB_STENCIL_CLEAR, 2);
+    E32(0); // DB_STENCIL_CLEAR
+    E32(0); // DB_DEPTH_CLEAR
+
+    EREG(DB_ALPHA_TO_MASK,                    ((2 << ALPHA_TO_MASK_OFFSET0_shift)	|
+					       (2 << ALPHA_TO_MASK_OFFSET1_shift)	|
+					       (2 << ALPHA_TO_MASK_OFFSET2_shift)	|
+					       (2 << ALPHA_TO_MASK_OFFSET3_shift)));
+
+    EREG(DB_SHADER_CONTROL, ((EARLY_Z_THEN_LATE_Z << Z_ORDER_shift) |
+			     DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */
+
+    // SX
+    EREG(SX_MISC,               0);
+
+    // CB
+    PACK0(SX_ALPHA_TEST_CONTROL, 5);
+    E32(0); // SX_ALPHA_TEST_CONTROL
+    E32(0x00000000); //CB_BLEND_RED
+    E32(0x00000000); //CB_BLEND_GREEN
+    E32(0x00000000); //CB_BLEND_BLUE
+    E32(0x00000000); //CB_BLEND_ALPHA
+
+    EREG(CB_SHADER_MASK,                      OUTPUT0_ENABLE_mask);
+
+    // SC
+    EREG(PA_SC_WINDOW_OFFSET,                 ((0 << WINDOW_X_OFFSET_shift) |
+					       (0 << WINDOW_Y_OFFSET_shift)));
+    EREG(PA_SC_CLIPRECT_RULE,                 CLIP_RULE_mask);
+    EREG(PA_SC_EDGERULE,             0xAAAAAAAA);
+    EREG(PA_SU_HARDWARE_SCREEN_OFFSET, 0);
+    END_BATCH();
+
+    /* clip boolean is set to always visible -> doesn't matter */
+    for (i = 0; i < PA_SC_CLIPRECT_0_TL_num; i++)
+	evergreen_set_clip_rect (pScrn, i, 0, 0, 8192, 8192);
+
+    for (i = 0; i < PA_SC_VPORT_SCISSOR_0_TL_num; i++)
+	evergreen_set_vport_scissor (pScrn, i, 0, 0, 8192, 8192);
+
+    BEGIN_BATCH(66);
+    PACK0(PA_SC_MODE_CNTL_0, 2);
+    E32(0); // PA_SC_MODE_CNTL_0
+    E32(0); // PA_SC_MODE_CNTL_1
+
+    PACK0(PA_SC_CENTROID_PRIORITY_0, 27);
+    E32((0 << DISTANCE_0_shift) |
+	(1 << DISTANCE_1_shift) |
+	(2 << DISTANCE_2_shift) |
+	(3 << DISTANCE_3_shift) |
+	(4 << DISTANCE_4_shift) |
+	(5 << DISTANCE_5_shift) |
+	(6 << DISTANCE_6_shift) |
+	(7 << DISTANCE_7_shift)); // PA_SC_CENTROID_PRIORITY_0
+    E32((8 << DISTANCE_8_shift) |
+	(9 << DISTANCE_9_shift) |
+	(10 << DISTANCE_10_shift) |
+	(11 << DISTANCE_11_shift) |
+	(12 << DISTANCE_12_shift) |
+	(13 << DISTANCE_13_shift) |
+	(14 << DISTANCE_14_shift) |
+	(15 << DISTANCE_15_shift)); // PA_SC_CENTROID_PRIORITY_1
+    E32(0); // PA_SC_LINE_CNTL
+    E32(0); // PA_SC_AA_CONFIG
+    E32(((X_ROUND_TO_EVEN << PA_SU_VTX_CNTL__ROUND_MODE_shift) |
+	 PIX_CENTER_bit)); // PA_SU_VTX_CNTL
+    EFLOAT(1.0);						// PA_CL_GB_VERT_CLIP_ADJ
+    EFLOAT(1.0);						// PA_CL_GB_VERT_DISC_ADJ
+    EFLOAT(1.0);						// PA_CL_GB_HORZ_CLIP_ADJ
+    EFLOAT(1.0);						// PA_CL_GB_HORZ_DISC_ADJ
+    E32(0); // PA_SC_AA_SAMPLE_LOCS_PIXEL_*
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0); // PA_SC_AA_SAMPLE_LOCS__PIXEL_*
+    E32(0xFFFFFFFF); // PA_SC_AA_MASK_*
+    E32(0xFFFFFFFF); // PA_SC_AA_MASK_*
+
+    // CL
+    PACK0(PA_CL_CLIP_CNTL, 8);
+    E32(CLIP_DISABLE_bit); // PA_CL_CLIP_CNTL
+    E32(FACE_bit); // PA_SU_SC_MODE_CNTL
+    E32(VTX_XY_FMT_bit); // PA_CL_VTE_CNTL
+    E32(0); // PA_CL_VS_OUT_CNTL
+    E32(0); // PA_CL_NANINF_CNTL
+    E32(0); // PA_SU_LINE_STIPPLE_CNTL
+    E32(0); // PA_SU_LINE_STIPPLE_SCALE
+    E32(0); // PA_SU_PRIM_FILTER_CNTL
+
+    // SU
+    PACK0(PA_SU_POLY_OFFSET_DB_FMT_CNTL, 6);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(SPI_INPUT_Z, 13);
+    E32(0); // SPI_INPUT_Z
+    E32(0); // SPI_FOG_CNTL
+    E32(LINEAR_CENTROID_ENA__X_ON_AT_CENTROID << LINEAR_CENTROID_ENA_shift); // SPI_BARYC_CNTL
+    E32(0); // SPI_PS_IN_CONTROL_2
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0); // SPI_GPR_MGMT
+    E32(0); // SPI_LDS_MGMT
+    E32(0); // SPI_STACK_MGMT
+    E32(0); // SPI_WAVE_MGMT_1
+    E32(0); // SPI_WAVE_MGMT_2
+    END_BATCH();
+
+    // clear FS
+    fs_conf.bo = accel_state->shaders_bo;
+    evergreen_fs_setup(pScrn, &fs_conf, RADEON_GEM_DOMAIN_VRAM);
+
+    // VGT
+    BEGIN_BATCH(46);
+
+    PACK0(VGT_MAX_VTX_INDX, 4);
+    E32(0xffffff);
+    E32(0);
+    E32(0);
+    E32(0);
+
+    PACK0(VGT_INSTANCE_STEP_RATE_0, 2);
+    E32(0);
+    E32(0);
+
+    PACK0(VGT_REUSE_OFF, 2);
+    E32(0);
+    E32(0);
+
+    PACK0(PA_SU_POINT_SIZE, 17);
+    E32(0); // PA_SU_POINT_SIZE
+    E32(0); // PA_SU_POINT_MINMAX
+    E32((8 << PA_SU_LINE_CNTL__WIDTH_shift)); /* Line width 1 pixel */ // PA_SU_LINE_CNTL
+    E32(0); // PA_SC_LINE_STIPPLE
+    E32(0); // VGT_OUTPUT_PATH_CNTL
+    E32(0); // VGT_HOS_CNTL
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0);
+    E32(0); // VGT_GS_MODE
+
+    EREG(VGT_PRIMITIVEID_EN,                  0);
+    EREG(VGT_MULTI_PRIM_IB_RESET_EN,          0);
+    EREG(VGT_SHADER_STAGES_EN,          0);
+
+    PACK0(VGT_STRMOUT_CONFIG, 2);
+    E32(0);
+    E32(0);
+    END_BATCH();
+}
+
+#endif
diff --git a/src/evergreen_accel.c b/src/evergreen_accel.c
index 12626c3..40aa372 100644
--- a/src/evergreen_accel.c
+++ b/src/evergreen_accel.c
@@ -145,6 +145,9 @@ evergreen_sq_setup(ScrnInfoPtr pScrn, sq_config_t *sq_conf)
     END_BATCH();
 }
 
+/* cayman has some minor differences in CB_COLOR*_INFO and _ATTRIB, but none that
+ * we use here.
+ */
 void
 evergreen_set_render_target(ScrnInfoPtr pScrn, cb_config_t *cb_conf, uint32_t domain)
 {
@@ -365,6 +368,9 @@ evergreen_fs_setup(ScrnInfoPtr pScrn, shader_config_t *fs_conf, uint32_t domain)
     END_BATCH();
 }
 
+/* cayman has some minor differences in SQ_PGM_RESOUCES_VS and _RESOURCES_2_VS,
+ * but none that we use here.
+ */
 void
 evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
 {
@@ -408,6 +414,9 @@ evergreen_vs_setup(ScrnInfoPtr pScrn, shader_config_t *vs_conf, uint32_t domain)
     END_BATCH();
 }
 
+/* cayman has some minor differences in SQ_PGM_RESOUCES_PS and _RESOURCES_2_PS,
+ * but none that we use here.
+ */
 void
 evergreen_ps_setup(ScrnInfoPtr pScrn, shader_config_t *ps_conf, uint32_t domain)
 {
@@ -507,6 +516,9 @@ evergreen_set_bool_consts(ScrnInfoPtr pScrn, int offset, uint32_t val)
     END_BATCH();
 }
 
+/* cayman has some minor differences in SQ_VTX_CONSTANT_WORD2_0 and _WORD3_0,
+ * but none that we use here.
+ */
 static void
 evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t domain)
 {
@@ -542,7 +554,8 @@ evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t doma
     /* flush vertex cache */
     if ((info->ChipFamily == CHIP_FAMILY_CEDAR) ||
 	(info->ChipFamily == CHIP_FAMILY_PALM) ||
-	(info->ChipFamily == CHIP_FAMILY_CAICOS))
+	(info->ChipFamily == CHIP_FAMILY_CAICOS) ||
+	(info->ChipFamily == CHIP_FAMILY_CAYMAN))
 	evergreen_cp_set_surface_sync(pScrn, TC_ACTION_ENA_bit,
 				      accel_state->vbo.vb_offset, accel_state->vbo.vb_mc_addr,
 				      res->bo,
@@ -567,6 +580,9 @@ evergreen_set_vtx_resource(ScrnInfoPtr pScrn, vtx_resource_t *res, uint32_t doma
     END_BATCH();
 }
 
+/* cayman has some minor differences in SQ_TEX_CONSTANT_WORD0_0 and _WORD4_0,
+ * but none that we use here.
+ */
 void
 evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t domain)
 {
@@ -640,6 +656,9 @@ evergreen_set_tex_resource(ScrnInfoPtr pScrn, tex_resource_t *tex_res, uint32_t
     END_BATCH();
 }
 
+/* cayman has some minor differences in SQ_TEX_SAMPLER_WORD0_0,
+ * but none that we use here.
+ */
 void
 evergreen_set_tex_sampler (ScrnInfoPtr pScrn, tex_sampler_t *s)
 {
@@ -773,6 +792,11 @@ evergreen_set_default_state(ScrnInfoPtr pScrn)
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
 
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN) {
+	cayman_set_default_state(pScrn);
+	return;
+    }
+
     if (accel_state->XInited3D)
 	return;
 
diff --git a/src/evergreen_state.h b/src/evergreen_state.h
index 480c141..40fec22 100644
--- a/src/evergreen_state.h
+++ b/src/evergreen_state.h
@@ -324,6 +324,8 @@ evergreen_set_clip_rect(ScrnInfoPtr pScrn, int id, int x1, int y1, int x2, int y
 void
 evergreen_set_default_state(ScrnInfoPtr pScrn);
 void
+cayman_set_default_state(ScrnInfoPtr pScrn);
+void
 evergreen_draw_auto(ScrnInfoPtr pScrn, draw_config_t *draw_conf);
 
 void evergreen_finish_op(ScrnInfoPtr pScrn, int vtx_size);
commit 42eecc6f4fb1570769490bdaeac06817c6c36a7e
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Mar 2 20:20:56 2011 -0500

    cayman: add 3D register headers
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/cayman_reg.h b/src/cayman_reg.h
new file mode 100644
index 0000000..25bd4b1
--- /dev/null
+++ b/src/cayman_reg.h
@@ -0,0 +1,236 @@
+/*
+ * Cayman Register documentation
+ *
+ * Copyright (C) 2011  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CAYMAN_REG_H_
+#define _CAYMAN_REG_H_
+
+/*
+ * Register definitions
+ */
+
+#include "cayman_reg_auto.h"
+
+enum {
+    SHADER_TYPE_PS,
+    SHADER_TYPE_VS,
+    SHADER_TYPE_GS,
+    SHADER_TYPE_HS,
+    SHADER_TYPE_LS,
+    SHADER_TYPE_CS,
+    SHADER_TYPE_FS,
+};
+
+
+/* SET_*_REG offsets + ends */
+enum {
+    SET_CONFIG_REG_offset          = 0x00008000,
+    SET_CONFIG_REG_end             = 0x0000ac00,
+    SET_CONTEXT_REG_offset         = 0x00028000,
+    SET_CONTEXT_REG_end            = 0x00029000,
+    SET_RESOURCE_offset            = 0x00030000,
+    SET_RESOURCE_end               = 0x00038000,
+    SET_SAMPLER_offset             = 0x0003c000,
+    SET_SAMPLER_end                = 0x0003c600,
+    SET_CTL_CONST_offset           = 0x0003cff0,
+    SET_CTL_CONST_end              = 0x0003ff0c,
+    SET_LOOP_CONST_offset          = 0x0003a200,
+    SET_LOOP_CONST_end             = 0x0003a500,
+    SET_BOOL_CONST_offset          = 0x0003a500,
+    SET_BOOL_CONST_end             = 0x0003a518,
+};
+
+/* Packet3 commands */
+enum {
+    IT_NOP                      = 0x10,
+    IT_INDIRECT_BUFFER_END      = 0x17,
+    IT_SET_PREDICATION          = 0x20,
+    IT_COND_EXEC                = 0x22,
+    IT_PRED_EXEC                = 0x23,
+    IT_DRAW_INDEX_2             = 0x27,
+    IT_CONTEXT_CONTROL          = 0x28,
+    IT_DRAW_INDEX_OFFSET        = 0x29,
+    IT_INDEX_TYPE               = 0x2A,
+    IT_DRAW_INDEX               = 0x2B,
+    IT_DRAW_INDEX_AUTO          = 0x2D,
+    IT_DRAW_INDEX_IMMD          = 0x2E,
+    IT_NUM_INSTANCES            = 0x2F,
+    IT_INDIRECT_BUFFER          = 0x32,
+    IT_STRMOUT_BUFFER_UPDATE    = 0x34,
+    IT_MEM_SEMAPHORE            = 0x39,
+    IT_MPEG_INDEX               = 0x3A,
+    IT_WAIT_REG_MEM             = 0x3C,
+    IT_MEM_WRITE                = 0x3D,
+    IT_SURFACE_SYNC             = 0x43,
+    IT_ME_INITIALIZE            = 0x44,
+    IT_COND_WRITE               = 0x45,
+    IT_EVENT_WRITE              = 0x46,
+    IT_EVENT_WRITE_EOP          = 0x47,
+    IT_EVENT_WRITE_EOS          = 0x48,
+    IT_SET_CONFIG_REG           = 0x68,
+    IT_SET_CONTEXT_REG          = 0x69,
+    IT_SET_ALU_CONST            = 0x6A,
+    IT_SET_BOOL_CONST           = 0x6B,
+    IT_SET_LOOP_CONST           = 0x6C,
+    IT_SET_RESOURCE             = 0x6D,
+    IT_SET_SAMPLER              = 0x6E,
+    IT_SET_CTL_CONST            = 0x6F,
+};
+
+/* IT_WAIT_REG_MEM operation encoding */
+
+#define IT_WAIT_ALWAYS          (0 << 0)
+#define IT_WAIT_LT              (1 << 0)
+#define IT_WAIT_LE              (2 << 0)
+#define IT_WAIT_EQ              (3 << 0)
+#define IT_WAIT_NE              (4 << 0)
+#define IT_WAIT_GE              (5 << 0)
+#define IT_WAIT_GT              (6 << 0)
+#define IT_WAIT_REG             (0 << 4)
+#define IT_WAIT_MEM             (1 << 4)
+
+#define IT_WAIT_ADDR(x)         ((x) >> 2)
+
+enum {
+
+    SQ_LDS_ALLOC_PS                                       = 0x288ec,
+    SQ_DYN_GPR_CNTL_PS_FLUSH_REQ                          = 0x8d8c,
+
+    CP_COHER_CNTL                                         = 0x85f0,
+	DEST_BASE_0_ENA_bit                               = 1 << 0,
+	DEST_BASE_1_ENA_bit                               = 1 << 1,
+	SO0_DEST_BASE_ENA_bit                             = 1 << 2,
+	SO1_DEST_BASE_ENA_bit                             = 1 << 3,
+	SO2_DEST_BASE_ENA_bit                             = 1 << 4,
+	SO3_DEST_BASE_ENA_bit                             = 1 << 5,
+	CB0_DEST_BASE_ENA_bit                             = 1 << 6,
+	CB1_DEST_BASE_ENA_bit                             = 1 << 7,
+	CB2_DEST_BASE_ENA_bit                             = 1 << 8,
+	CB3_DEST_BASE_ENA_bit                             = 1 << 9,
+	CB4_DEST_BASE_ENA_bit                             = 1 << 10,
+	CB5_DEST_BASE_ENA_bit                             = 1 << 11,
+	CB6_DEST_BASE_ENA_bit                             = 1 << 12,
+	CB7_DEST_BASE_ENA_bit                             = 1 << 13,
+	DB_DEST_BASE_ENA_bit                              = 1 << 14,
+	CB8_DEST_BASE_ENA_bit                             = 1 << 15,
+	CB9_DEST_BASE_ENA_bit                             = 1 << 16,
+	CB10_DEST_BASE_ENA_bit                            = 1 << 17,
+	CB11_DEST_BASE_ENA_bit                            = 1 << 18,
+	FULL_CACHE_ENA_bit                                = 1 << 20,
+	TC_ACTION_ENA_bit                                 = 1 << 23,
+	CB_ACTION_ENA_bit                                 = 1 << 25,
+	DB_ACTION_ENA_bit                                 = 1 << 26,
+	SH_ACTION_ENA_bit                                 = 1 << 27,
+	SX_ACTION_ENA_bit                                 = 1 << 28,
+    CP_COHER_SIZE                                         = 0x85f4,
+    CP_COHER_BASE                                         = 0x85f8,
+    CP_COHER_STATUS                                       = 0x85fc,
+	MATCHING_GFX_CNTX_mask                            = 0xff << 0,
+	MATCHING_GFX_CNTX_shift                           = 0,
+	STATUS_bit                                        = 1 << 31,
+
+//  SQ_VTX_CONSTANT_WORD2_0                               = 0x00030008,
+//    	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+	FMT_INVALID=0,      FMT_8,          FMT_4_4,            FMT_3_3_2,
+	                    FMT_16=5,       FMT_16_FLOAT,       FMT_8_8,
+	FMT_5_6_5,          FMT_6_5_5,      FMT_1_5_5_5,        FMT_4_4_4_4,
+	FMT_5_5_5_1,        FMT_32,         FMT_32_FLOAT,       FMT_16_16,
+	FMT_16_16_FLOAT=16, FMT_8_24,       FMT_8_24_FLOAT,     FMT_24_8,
+	FMT_24_8_FLOAT,     FMT_10_11_11,   FMT_10_11_11_FLOAT, FMT_11_11_10,
+	FMT_11_11_10_FLOAT, FMT_2_10_10_10, FMT_8_8_8_8,        FMT_10_10_10_2,
+	FMT_X24_8_32_FLOAT, FMT_32_32,      FMT_32_32_FLOAT,    FMT_16_16_16_16,
+	FMT_16_16_16_16_FLOAT=32,           FMT_32_32_32_32=34, FMT_32_32_32_32_FLOAT,
+	                    FMT_1 = 37,                         FMT_GB_GR=39,
+	FMT_BG_RG,          FMT_32_AS_8,    FMT_32_AS_8_8,      FMT_5_9_9_9_SHAREDEXP,
+	FMT_8_8_8,          FMT_16_16_16,   FMT_16_16_16_FLOAT, FMT_32_32_32,
+	FMT_32_32_32_FLOAT=48,
+
+//  High level register file lengths
+    SQ_FETCH_RESOURCE                                       = SQ_TEX_RESOURCE_WORD0_0,
+    SQ_FETCH_RESOURCE_ps_num                                = 176,
+    SQ_FETCH_RESOURCE_vs_num                                = 160,
+    SQ_FETCH_RESOURCE_gs_num                                = 160,
+    SQ_FETCH_RESOURCE_hs_num                                = 160,
+    SQ_FETCH_RESOURCE_ls_num                                = 160,
+    SQ_FETCH_RESOURCE_cs_num                                = 176,
+    SQ_FETCH_RESOURCE_fs_num                                = 32,
+    SQ_FETCH_RESOURCE_all_num                               = 1024,
+    SQ_FETCH_RESOURCE_offset                                = 32,
+    SQ_FETCH_RESOURCE_ps                                    = 0,                                               //   0...175
+    SQ_FETCH_RESOURCE_vs                                    = SQ_FETCH_RESOURCE_ps + SQ_FETCH_RESOURCE_ps_num, // 176...335
+    SQ_FETCH_RESOURCE_gs                                    = SQ_FETCH_RESOURCE_vs + SQ_FETCH_RESOURCE_vs_num, // 336...495
+    SQ_FETCH_RESOURCE_hs                                    = SQ_FETCH_RESOURCE_gs + SQ_FETCH_RESOURCE_gs_num, // 496...655
+    SQ_FETCH_RESOURCE_ls                                    = SQ_FETCH_RESOURCE_hs + SQ_FETCH_RESOURCE_hs_num, // 656...815
+    SQ_FETCH_RESOURCE_cs                                    = SQ_FETCH_RESOURCE_ls + SQ_FETCH_RESOURCE_ls_num, // 816...991
+    SQ_FETCH_RESOURCE_fs                                    = SQ_FETCH_RESOURCE_cs + SQ_FETCH_RESOURCE_cs_num, // 992...1023
+
+    SQ_TEX_SAMPLER_WORD                                   = SQ_TEX_SAMPLER_WORD0_0,
+    SQ_TEX_SAMPLER_WORD_ps_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_vs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_gs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_hs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_ls_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_cs_num                            = 18,
+    SQ_TEX_SAMPLER_WORD_all_num                           = 108,
+    SQ_TEX_SAMPLER_WORD_offset                            = 12,
+    SQ_TEX_SAMPLER_WORD_ps                                = 0,                                                   //  0...17
+    SQ_TEX_SAMPLER_WORD_vs                                = SQ_TEX_SAMPLER_WORD_ps + SQ_TEX_SAMPLER_WORD_ps_num, // 18...35
+    SQ_TEX_SAMPLER_WORD_gs                                = SQ_TEX_SAMPLER_WORD_vs + SQ_TEX_SAMPLER_WORD_vs_num, // 36...53
+    SQ_TEX_SAMPLER_WORD_hs                                = SQ_TEX_SAMPLER_WORD_gs + SQ_TEX_SAMPLER_WORD_gs_num, // 54...71
+    SQ_TEX_SAMPLER_WORD_ls                                = SQ_TEX_SAMPLER_WORD_hs + SQ_TEX_SAMPLER_WORD_hs_num, // 72...89
+    SQ_TEX_SAMPLER_WORD_cs                                = SQ_TEX_SAMPLER_WORD_ls + SQ_TEX_SAMPLER_WORD_ls_num, // 90...107
+
+    SQ_LOOP_CONST                                         = SQ_LOOP_CONST_0,
+    SQ_LOOP_CONST_ps_num                                  = 32,
+    SQ_LOOP_CONST_vs_num                                  = 32,
+    SQ_LOOP_CONST_gs_num                                  = 32,
+    SQ_LOOP_CONST_hs_num                                  = 32,
+    SQ_LOOP_CONST_ls_num                                  = 32,
+    SQ_LOOP_CONST_cs_num                                  = 32,
+    SQ_LOOP_CONST_all_num                                 = 192,
+    SQ_LOOP_CONST_offset                                  = 4,
+    SQ_LOOP_CONST_ps                                      = 0,                                       //   0...31
+    SQ_LOOP_CONST_vs                                      = SQ_LOOP_CONST_ps + SQ_LOOP_CONST_ps_num, //  32...63
+    SQ_LOOP_CONST_gs                                      = SQ_LOOP_CONST_vs + SQ_LOOP_CONST_vs_num, //  64...95
+    SQ_LOOP_CONST_hs                                      = SQ_LOOP_CONST_gs + SQ_LOOP_CONST_gs_num, //  96...127
+    SQ_LOOP_CONST_ls                                      = SQ_LOOP_CONST_hs + SQ_LOOP_CONST_hs_num, // 128...159
+    SQ_LOOP_CONST_cs                                      = SQ_LOOP_CONST_ls + SQ_LOOP_CONST_ls_num, // 160...191
+
+    SQ_BOOL_CONST                                         = SQ_BOOL_CONST_0, /* 32 bits each */
+    SQ_BOOL_CONST_ps_num                                  = 1,
+    SQ_BOOL_CONST_vs_num                                  = 1,
+    SQ_BOOL_CONST_gs_num                                  = 1,
+    SQ_BOOL_CONST_hs_num                                  = 1,
+    SQ_BOOL_CONST_ls_num                                  = 1,
+    SQ_BOOL_CONST_cs_num                                  = 1,
+    SQ_BOOL_CONST_all_num                                 = 6,
+    SQ_BOOL_CONST_offset                                  = 4,
+    SQ_BOOL_CONST_ps                                      = 0,
+    SQ_BOOL_CONST_vs                                      = SQ_BOOL_CONST_ps + SQ_BOOL_CONST_ps_num,
+    SQ_BOOL_CONST_gs                                      = SQ_BOOL_CONST_vs + SQ_BOOL_CONST_vs_num,
+    SQ_BOOL_CONST_hs                                      = SQ_BOOL_CONST_gs + SQ_BOOL_CONST_gs_num,
+    SQ_BOOL_CONST_ls                                      = SQ_BOOL_CONST_hs + SQ_BOOL_CONST_hs_num,
+    SQ_BOOL_CONST_cs                                      = SQ_BOOL_CONST_ls + SQ_BOOL_CONST_ls_num,
+
+};
+
+#endif
diff --git a/src/cayman_reg_auto.h b/src/cayman_reg_auto.h
new file mode 100644
index 0000000..460c1e5
--- /dev/null
+++ b/src/cayman_reg_auto.h
@@ -0,0 +1,4351 @@
+/*
+ * Cayman Register documentation
+ *
+ * Copyright (C) 2011  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _CAYMAN_REG_AUTO
+#define _CAYMAN_REG_AUTO
+
+enum {
+
+    VGT_VTX_VECT_EJECT_REG                                = 0x000088b0,
+	PRIM_COUNT_mask                                   = 0x3ff << 0,
+	PRIM_COUNT_shift                                  = 0,
+    VGT_LAST_COPY_STATE                                   = 0x000088c0,
+	SRC_STATE_ID_mask                                 = 0x07 << 0,
+	SRC_STATE_ID_shift                                = 0,
+	DST_STATE_ID_mask                                 = 0x07 << 16,
+	DST_STATE_ID_shift                                = 16,
+    VGT_CACHE_INVALIDATION                                = 0x000088c4,
+	CACHE_INVALIDATION_mask                           = 0x03 << 0,
+	CACHE_INVALIDATION_shift                          = 0,
+	    VC_ONLY                                       = 0x00,
+	    TC_ONLY                                       = 0x01,
+	    VC_AND_TC                                     = 0x02,
+	VS_NO_EXTRA_BUFFER_bit                            = 1 << 5,
+	AUTO_INVLD_EN_mask                                = 0x03 << 6,
+	AUTO_INVLD_EN_shift                               = 6,
+    VGT_GS_VERTEX_REUSE                                   = 0x000088d4,
+	VERT_REUSE_mask                                   = 0x1f << 0,
+	VERT_REUSE_shift                                  = 0,
+    VGT_CNTL_STATUS                                       = 0x000088f0,
+	VGT_OUT_INDX_BUSY_bit                             = 1 << 0,
+	VGT_OUT_BUSY_bit                                  = 1 << 1,
+	VGT_PT_BUSY_bit                                   = 1 << 2,
+	VGT_TE_BUSY_bit                                   = 1 << 3,
+	VGT_VR_BUSY_bit                                   = 1 << 4,
+	VGT_PI_BUSY_bit                                   = 1 << 5,
+	VGT_INVLD_BUSY_bit                                = 1 << 6,
+	VGT_GS_BUSY_bit                                   = 1 << 7,
+	VGT_HS_BUSY_bit                                   = 1 << 8,
+	VGT_TE11_BUSY_bit                                 = 1 << 9,
+	VGT_BUSY_bit                                      = 1 << 10,
+    VGT_PRIMITIVE_TYPE                                    = 0x00008958,
+	VGT_PRIMITIVE_TYPE__PRIM_TYPE_mask                = 0x3f << 0,
+	VGT_PRIMITIVE_TYPE__PRIM_TYPE_shift               = 0,
+	    DI_PT_NONE                                    = 0x00,
+	    DI_PT_POINTLIST                               = 0x01,
+	    DI_PT_LINELIST                                = 0x02,
+	    DI_PT_LINESTRIP                               = 0x03,
+	    DI_PT_TRILIST                                 = 0x04,
+	    DI_PT_TRIFAN                                  = 0x05,
+	    DI_PT_TRISTRIP                                = 0x06,
+	    DI_PT_UNUSED_0                                = 0x07,
+	    DI_PT_UNUSED_1                                = 0x08,
+	    DI_PT_PATCH                                   = 0x09,
+	    DI_PT_LINELIST_ADJ                            = 0x0a,
+	    DI_PT_LINESTRIP_ADJ                           = 0x0b,
+	    DI_PT_TRILIST_ADJ                             = 0x0c,
+	    DI_PT_TRISTRIP_ADJ                            = 0x0d,
+	    DI_PT_UNUSED_3                                = 0x0e,
+	    DI_PT_UNUSED_4                                = 0x0f,
+	    DI_PT_TRI_WITH_WFLAGS                         = 0x10,
+	    DI_PT_RECTLIST                                = 0x11,
+	    DI_PT_LINELOOP                                = 0x12,
+	    DI_PT_QUADLIST                                = 0x13,
+	    DI_PT_QUADSTRIP                               = 0x14,
+	    DI_PT_POLYGON                                 = 0x15,
+	    DI_PT_2D_COPY_RECT_LIST_V0                    = 0x16,
+	    DI_PT_2D_COPY_RECT_LIST_V1                    = 0x17,
+	    DI_PT_2D_COPY_RECT_LIST_V2                    = 0x18,
+	    DI_PT_2D_COPY_RECT_LIST_V3                    = 0x19,
+	    DI_PT_2D_FILL_RECT_LIST                       = 0x1a,
+	    DI_PT_2D_LINE_STRIP                           = 0x1b,
+	    DI_PT_2D_TRI_STRIP                            = 0x1c,
+    VGT_INDEX_TYPE                                        = 0x0000895c,
+	INDEX_TYPE_mask                                   = 0x03 << 0,
+	INDEX_TYPE_shift                                  = 0,
+	    DI_INDEX_SIZE_16_BIT                          = 0x00,
+	    DI_INDEX_SIZE_32_BIT                          = 0x01,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_0                      = 0x00008960,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_1                      = 0x00008964,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_2                      = 0x00008968,
+    VGT_STRMOUT_BUFFER_FILLED_SIZE_3                      = 0x0000896c,
+    VGT_NUM_INDICES                                       = 0x00008970,
+    VGT_NUM_INSTANCES                                     = 0x00008974,
+    VGT_SYS_CONFIG                                        = 0x0000898c,
+	DUAL_CORE_EN_bit                                  = 1 << 0,
+	MAX_LS_HS_THDGRP_mask                             = 0x3f << 1,
+	MAX_LS_HS_THDGRP_shift                            = 1,
+    PA_CL_CNTL_STATUS                                     = 0x00008a10,
+	CL_BUSY_bit                                       = 1 << 31,
+    PA_CL_ENHANCE                                         = 0x00008a14,
+	CLIP_VTX_REORDER_ENA_bit                          = 1 << 0,
+	NUM_CLIP_SEQ_mask                                 = 0x03 << 1,
+	NUM_CLIP_SEQ_shift                                = 1,
+	CLIPPED_PRIM_SEQ_STALL_bit                        = 1 << 3,
+	VE_NAN_PROC_DISABLE_bit                           = 1 << 4,
+    PA_SU_CNTL_STATUS                                     = 0x00008a50,
+	SU_BUSY_bit                                       = 1 << 31,
+    PA_SU_LINE_STIPPLE_VALUE                              = 0x00008a60,
+	LINE_STIPPLE_VALUE_mask                           = 0xffffff << 0,
+	LINE_STIPPLE_VALUE_shift                          = 0,
+    PA_SC_LINE_STIPPLE_STATE                              = 0x00008b10,
+	CURRENT_PTR_mask                                  = 0x0f << 0,
+	CURRENT_PTR_shift                                 = 0,
+	CURRENT_COUNT_mask                                = 0xff << 8,
+	CURRENT_COUNT_shift                               = 8,
+    SQ_CONFIG                                             = 0x00008c00,
+	VC_ENABLE_bit                                     = 1 << 0,
+	EXPORT_SRC_C_bit                                  = 1 << 1,
+	GFX_PRIO_mask                                     = 0x03 << 2,
+	GFX_PRIO_shift                                    = 2,
+	CS1_PRIO_mask                                     = 0x03 << 4,
+	CS1_PRIO_shift                                    = 4,
+	CS2_PRIO_mask                                     = 0x03 << 6,
+	CS2_PRIO_shift                                    = 6,
+    SQ_GPR_RESOURCE_MGMT_1                                = 0x00008c04,
+	SQ_GPR_RESOURCE_MGMT_1__NUM_PS_GPRS_mask          = 0xff << 0,
+	SQ_GPR_RESOURCE_MGMT_1__NUM_PS_GPRS_shift         = 0,
+	SQ_GPR_RESOURCE_MGMT_1__NUM_VS_GPRS_mask          = 0xff << 16,
+	SQ_GPR_RESOURCE_MGMT_1__NUM_VS_GPRS_shift         = 16,
+	NUM_CLAUSE_TEMP_GPRS_mask                         = 0x0f << 28,
+	NUM_CLAUSE_TEMP_GPRS_shift                        = 28,
+    SQ_GLOBAL_GPR_RESOURCE_MGMT_1                         = 0x00008c10,
+	PS_GGPR_BASE_mask                                 = 0xff << 0,
+	PS_GGPR_BASE_shift                                = 0,
+	VS_GGPR_BASE_mask                                 = 0xff << 8,
+	VS_GGPR_BASE_shift                                = 8,
+	GS_GGPR_BASE_mask                                 = 0xff << 16,
+	GS_GGPR_BASE_shift                                = 16,
+	ES_GGPR_BASE_mask                                 = 0xff << 24,
+	ES_GGPR_BASE_shift                                = 24,
+    SQ_GLOBAL_GPR_RESOURCE_MGMT_2                         = 0x00008c14,
+	HS_GGPR_BASE_mask                                 = 0xff << 0,
+	HS_GGPR_BASE_shift                                = 0,
+	LS_GGPR_BASE_mask                                 = 0xff << 8,
+	LS_GGPR_BASE_shift                                = 8,
+	CS_GGPR_BASE_mask                                 = 0xff << 16,
+	CS_GGPR_BASE_shift                                = 16,
+    SQ_ESGS_RING_BASE                                     = 0x00008c40,
+    SQ_ESGS_RING_SIZE                                     = 0x00008c44,
+    SQ_GSVS_RING_BASE                                     = 0x00008c48,
+    SQ_GSVS_RING_SIZE                                     = 0x00008c4c,
+    SQ_ESTMP_RING_BASE                                    = 0x00008c50,
+    SQ_ESTMP_RING_SIZE                                    = 0x00008c54,
+    SQ_GSTMP_RING_BASE                                    = 0x00008c58,
+    SQ_GSTMP_RING_SIZE                                    = 0x00008c5c,
+    SQ_VSTMP_RING_BASE                                    = 0x00008c60,
+    SQ_VSTMP_RING_SIZE                                    = 0x00008c64,
+    SQ_PSTMP_RING_BASE                                    = 0x00008c68,
+    SQ_PSTMP_RING_SIZE                                    = 0x00008c6c,
+    SQ_ALU_WORD1_OP3                                      = 0x00008dfc,
+	SRC2_SEL_mask                                     = 0x1ff << 0,
+	SRC2_SEL_shift                                    = 0,
+	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb,
+	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc,
+	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd,
+	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde,
+	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf,
+	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0,
+	    SQ_ALU_SRC_TIME_HI                            = 0xe3,
+	    SQ_ALU_SRC_TIME_LO                            = 0xe4,
+	    SQ_ALU_SRC_MASK_HI                            = 0xe5,
+	    SQ_ALU_SRC_MASK_LO                            = 0xe6,
+	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7,
+	    SQ_ALU_SRC_SIMD_ID                            = 0xe8,
+	    SQ_ALU_SRC_SE_ID                              = 0xe9,
+	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea,
+	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb,
+	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec,
+	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed,
+	    SQ_ALU_SRC_LOOP_IDX                           = 0xee,
+	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0,
+	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1,
+	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2,
+	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3,
+	    SQ_ALU_SRC_1_DBL_L                            = 0xf4,
+	    SQ_ALU_SRC_1_DBL_M                            = 0xf5,
+	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6,
+	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7,
+	    SQ_ALU_SRC_0                                  = 0xf8,
+	    SQ_ALU_SRC_1                                  = 0xf9,
+	    SQ_ALU_SRC_1_INT                              = 0xfa,
+	    SQ_ALU_SRC_M_1_INT                            = 0xfb,
+	    SQ_ALU_SRC_0_5                                = 0xfc,
+	    SQ_ALU_SRC_LITERAL                            = 0xfd,
+	    SQ_ALU_SRC_PV                                 = 0xfe,
+	    SQ_ALU_SRC_PS                                 = 0xff,
+	SRC2_REL_bit                                      = 1 << 9,
+	SRC2_CHAN_mask                                    = 0x03 << 10,
+	SRC2_CHAN_shift                                   = 10,
+	    SQ_CHAN_X                                     = 0x00,
+	    SQ_CHAN_Y                                     = 0x01,
+	    SQ_CHAN_Z                                     = 0x02,
+	    SQ_CHAN_W                                     = 0x03,
+	SRC2_NEG_bit                                      = 1 << 12,
+	SQ_ALU_WORD1_OP3__ALU_INST_mask                   = 0x1f << 13,
+	SQ_ALU_WORD1_OP3__ALU_INST_shift                  = 13,
+	    SQ_OP3_INST_BFE_UINT                          = 0x04,
+	    SQ_OP3_INST_BFE_INT                           = 0x05,
+	    SQ_OP3_INST_BFI_INT                           = 0x06,
+	    SQ_OP3_INST_FMA                               = 0x07,
+	    SQ_OP3_INST_CNDNE_64                          = 0x09,
+	    SQ_OP3_INST_FMA_64                            = 0x0a,
+	    SQ_OP3_INST_LERP_UINT                         = 0x0b,
+	    SQ_OP3_INST_BIT_ALIGN_INT                     = 0x0c,
+	    SQ_OP3_INST_BYTE_ALIGN_INT                    = 0x0d,
+	    SQ_OP3_INST_SAD_ACCUM_UINT                    = 0x0e,
+	    SQ_OP3_INST_SAD_ACCUM_HI_UINT                 = 0x0f,
+	    SQ_OP3_INST_MULADD_UINT24                     = 0x10,
+	    SQ_OP3_INST_LDS_IDX_OP                        = 0x11,
+	    SQ_OP3_INST_MULADD                            = 0x14,
+	    SQ_OP3_INST_MULADD_M2                         = 0x15,
+	    SQ_OP3_INST_MULADD_M4                         = 0x16,
+	    SQ_OP3_INST_MULADD_D2                         = 0x17,
+	    SQ_OP3_INST_MULADD_IEEE                       = 0x18,
+	    SQ_OP3_INST_CNDE                              = 0x19,
+	    SQ_OP3_INST_CNDGT                             = 0x1a,
+	    SQ_OP3_INST_CNDGE                             = 0x1b,
+	    SQ_OP3_INST_CNDE_INT                          = 0x1c,
+	    SQ_OP3_INST_CNDGT_INT                         = 0x1d,
+	    SQ_OP3_INST_CNDGE_INT                         = 0x1e,
+	    SQ_OP3_INST_MUL_LIT                           = 0x1f,
+    SQ_ALU_WORD1_LDS_DIRECT_LITERAL_LO                    = 0x00008dfc,
+	OFFSET_A_mask                                     = 0x1fff << 0,
+	OFFSET_A_shift                                    = 0,
+	STRIDE_A_mask                                     = 0x7f << 13,
+	STRIDE_A_shift                                    = 13,
+	THREAD_REL_A_bit                                  = 1 << 22,
+    SQ_TEX_WORD2                                          = 0x00008dfc,
+	OFFSET_X_mask                                     = 0x1f << 0,
+	OFFSET_X_shift                                    = 0,
+	OFFSET_Y_mask                                     = 0x1f << 5,
+	OFFSET_Y_shift                                    = 5,
+	OFFSET_Z_mask                                     = 0x1f << 10,
+	OFFSET_Z_shift                                    = 10,
+	SAMPLER_ID_mask                                   = 0x1f << 15,
+	SAMPLER_ID_shift                                  = 15,
+	SQ_TEX_WORD2__SRC_SEL_X_mask                      = 0x07 << 20,
+	SQ_TEX_WORD2__SRC_SEL_X_shift                     = 20,
+	    SQ_SEL_X                                      = 0x00,
+	    SQ_SEL_Y                                      = 0x01,
+	    SQ_SEL_Z                                      = 0x02,
+	    SQ_SEL_W                                      = 0x03,
+	    SQ_SEL_0                                      = 0x04,
+	    SQ_SEL_1                                      = 0x05,
+	SQ_TEX_WORD2__SRC_SEL_Y_mask                      = 0x07 << 23,
+	SQ_TEX_WORD2__SRC_SEL_Y_shift                     = 23,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SRC_SEL_Z_mask                                    = 0x07 << 26,
+	SRC_SEL_Z_shift                                   = 26,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SRC_SEL_W_mask                                    = 0x07 << 29,
+	SRC_SEL_W_shift                                   = 29,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_CF_ALLOC_EXPORT_WORD1                              = 0x00008dfc,
+	BURST_COUNT_mask                                  = 0x0f << 16,
+	BURST_COUNT_shift                                 = 16,
+	VALID_PIXEL_MODE_bit                              = 1 << 20,
+	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_mask            = 0xff << 22,
+	SQ_CF_ALLOC_EXPORT_WORD1__CF_INST_shift           = 22,
+	    SQ_CF_INST_MEM_STREAM0_BUF0                   = 0x40,
+	    SQ_CF_INST_MEM_STREAM0_BUF1                   = 0x41,
+	    SQ_CF_INST_MEM_STREAM0_BUF2                   = 0x42,
+	    SQ_CF_INST_MEM_STREAM0_BUF3                   = 0x43,
+	    SQ_CF_INST_MEM_STREAM1_BUF0                   = 0x44,
+	    SQ_CF_INST_MEM_STREAM1_BUF1                   = 0x45,
+	    SQ_CF_INST_MEM_STREAM1_BUF2                   = 0x46,
+	    SQ_CF_INST_MEM_STREAM1_BUF3                   = 0x47,
+	    SQ_CF_INST_MEM_STREAM2_BUF0                   = 0x48,
+	    SQ_CF_INST_MEM_STREAM2_BUF1                   = 0x49,
+	    SQ_CF_INST_MEM_STREAM2_BUF2                   = 0x4a,
+	    SQ_CF_INST_MEM_STREAM2_BUF3                   = 0x4b,
+	    SQ_CF_INST_MEM_STREAM3_BUF0                   = 0x4c,
+	    SQ_CF_INST_MEM_STREAM3_BUF1                   = 0x4d,
+	    SQ_CF_INST_MEM_STREAM3_BUF2                   = 0x4e,
+	    SQ_CF_INST_MEM_STREAM3_BUF3                   = 0x4f,
+	    SQ_CF_INST_MEM_SCRATCH                        = 0x50,
+	    SQ_CF_INST_MEM_RING                           = 0x52,
+	    SQ_CF_INST_EXPORT                             = 0x53,
+	    SQ_CF_INST_EXPORT_DONE                        = 0x54,
+	    SQ_CF_INST_MEM_EXPORT                         = 0x55,
+	    SQ_CF_INST_MEM_RAT                            = 0x56,
+	    SQ_CF_INST_MEM_RAT_CACHELESS                  = 0x57,
+	    SQ_CF_INST_MEM_RING1                          = 0x58,
+	    SQ_CF_INST_MEM_RING2                          = 0x59,
+	    SQ_CF_INST_MEM_RING3                          = 0x5a,
+	    SQ_CF_INST_MEM_EXPORT_COMBINED                = 0x5b,
+	    SQ_CF_INST_MEM_RAT_COMBINED_CACHELESS         = 0x5c,
+	    SQ_CF_INST_MEM_RAT_COMBINED                   = 0x5d,
+	    SQ_CF_INST_EXPORT_DONE_END_IS_NEXT            = 0x5e,
+	MARK_bit                                          = 1 << 30,
+	BARRIER_bit                                       = 1 << 31,
+    SQ_CF_ALU_WORD1                                       = 0x00008dfc,
+	KCACHE_MODE1_mask                                 = 0x03 << 0,
+	KCACHE_MODE1_shift                                = 0,
+	    SQ_CF_KCACHE_NOP                              = 0x00,
+	    SQ_CF_KCACHE_LOCK_1                           = 0x01,
+	    SQ_CF_KCACHE_LOCK_2                           = 0x02,
+	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03,
+	KCACHE_ADDR0_mask                                 = 0xff << 2,
+	KCACHE_ADDR0_shift                                = 2,
+	KCACHE_ADDR1_mask                                 = 0xff << 10,
+	KCACHE_ADDR1_shift                                = 10,
+	SQ_CF_ALU_WORD1__COUNT_mask                       = 0x7f << 18,
+	SQ_CF_ALU_WORD1__COUNT_shift                      = 18,
+	SQ_CF_ALU_WORD1__ALT_CONST_bit                    = 1 << 25,
+	SQ_CF_ALU_WORD1__CF_INST_mask                     = 0x0f << 26,
+	SQ_CF_ALU_WORD1__CF_INST_shift                    = 26,
+	    SQ_CF_INST_ALU                                = 0x08,
+	    SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09,
+	    SQ_CF_INST_ALU_POP_AFTER                      = 0x0a,
+	    SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b,
+	    SQ_CF_INST_ALU_EXTENDED                       = 0x0c,
+	    SQ_CF_INST_ALU_CONTINUE                       = 0x0d,
+	    SQ_CF_INST_ALU_BREAK                          = 0x0e,
+	    SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f,
+	WHOLE_QUAD_MODE_bit                               = 1 << 30,
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_TEX_WORD1                                          = 0x00008dfc,
+	SQ_TEX_WORD1__DST_GPR_mask                        = 0x7f << 0,
+	SQ_TEX_WORD1__DST_GPR_shift                       = 0,
+	SQ_TEX_WORD1__DST_REL_bit                         = 1 << 7,
+	SQ_TEX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+	SQ_TEX_WORD1__DST_SEL_X_shift                     = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	    SQ_SEL_MASK                                   = 0x07,
+	SQ_TEX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+	SQ_TEX_WORD1__DST_SEL_Y_shift                     = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+	SQ_TEX_WORD1__DST_SEL_Z_shift                     = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+	SQ_TEX_WORD1__DST_SEL_W_shift                     = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_TEX_WORD1__LOD_BIAS_mask                       = 0x7f << 21,
+	SQ_TEX_WORD1__LOD_BIAS_shift                      = 21,
+	COORD_TYPE_X_bit                                  = 1 << 28,
+	COORD_TYPE_Y_bit                                  = 1 << 29,
+	COORD_TYPE_Z_bit                                  = 1 << 30,
+	COORD_TYPE_W_bit                                  = 1 << 31,
+    SQ_ALU_WORD1_OP2_MOVA                                 = 0x00008dfc,
+	MOVA_DST_mask                                     = 0x7f << 21,
+	MOVA_DST_shift                                    = 21,
+	    SQ_ALU_MOVA_DST_AR_X                          = 0x00,
+	    SQ_ALU_MOVA_DST_CF_PC                         = 0x01,
+	    SQ_ALU_MOVA_DST_CF_IDX0                       = 0x02,
+	    SQ_ALU_MOVA_DST_CF_IDX1                       = 0x03,
+	    SQ_ALU_MOVA_DST_CLAUSE_GLOBAL_B0              = 0x04,
+	    SQ_ALU_MOVA_DST_CLAUSE_GLOBAL_B1              = 0x05,
+	    SQ_ALU_MOVA_DST_CLAUSE_GLOBAL_B2              = 0x06,
+	    SQ_ALU_MOVA_DST_CLAUSE_GLOBAL_B3              = 0x07,
+    SQ_VTX_WORD0                                          = 0x00008dfc,
+	VTX_INST_mask                                     = 0x1f << 0,
+	VTX_INST_shift                                    = 0,
+	    SQ_VTX_INST_FETCH                             = 0x00,
+	    SQ_VTX_INST_SEMANTIC                          = 0x01,
+	    SQ_VTX_INST_GET_BUFFER_RESINFO                = 0x0e,
+	FETCH_TYPE_mask                                   = 0x03 << 5,
+	FETCH_TYPE_shift                                  = 5,
+	    SQ_VTX_FETCH_VERTEX_DATA                      = 0x00,
+	    SQ_VTX_FETCH_INSTANCE_DATA                    = 0x01,
+	    SQ_VTX_FETCH_NO_INDEX_OFFSET                  = 0x02,
+	FETCH_WHOLE_QUAD_bit                              = 1 << 7,
+	BUFFER_ID_mask                                    = 0xff << 8,
+	BUFFER_ID_shift                                   = 8,
+	SQ_VTX_WORD0__SRC_GPR_mask                        = 0x7f << 16,
+	SQ_VTX_WORD0__SRC_GPR_shift                       = 16,
+	SRC_REL_bit                                       = 1 << 23,
+	SQ_VTX_WORD0__SRC_SEL_X_mask                      = 0x03 << 24,
+	SQ_VTX_WORD0__SRC_SEL_X_shift                     = 24,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	SQ_VTX_WORD0__SRC_SEL_Y_mask                      = 0x03 << 26,
+	SQ_VTX_WORD0__SRC_SEL_Y_shift                     = 26,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	STRUCTURED_READ_mask                              = 0x03 << 28,
+	STRUCTURED_READ_shift                             = 28,
+	    SQ_VTX_STRU_READ_OFF                          = 0x00,
+	    SQ_VTX_STRU_READ_GPR_OFFSET                   = 0x01,
+	    SQ_VTX_STRU_READ_INST_OFFSET                  = 0x02,
+	LDS_REQ_bit                                       = 1 << 30,
+	COALESCED_READ_bit                                = 1 << 31,
+    SQ_CF_ALLOC_EXPORT_WORD1_SWIZ                         = 0x00008dfc,
+	SEL_X_mask                                        = 0x07 << 0,
+	SEL_X_shift                                       = 0,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_Y_mask                                        = 0x07 << 3,
+	SEL_Y_shift                                       = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_Z_mask                                        = 0x07 << 6,
+	SEL_Z_shift                                       = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SEL_W_mask                                        = 0x07 << 9,
+	SEL_W_shift                                       = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+    SQ_MEM_RD_WORD0                                       = 0x00008dfc,
+	MEM_INST_mask                                     = 0x1f << 0,
+	MEM_INST_shift                                    = 0,
+	    SQ_MEM_INST_MEM                               = 0x02,
+	SQ_MEM_RD_WORD0__ELEM_SIZE_mask                   = 0x03 << 5,
+	SQ_MEM_RD_WORD0__ELEM_SIZE_shift                  = 5,
+/* 	FETCH_WHOLE_QUAD_bit                              = 1 << 7, */
+	MEM_OP_mask                                       = 0x07 << 8,
+	MEM_OP_shift                                      = 8,
+	    SQ_MEM_OP_RD_SCRATCH                          = 0x00,
+	    SQ_MEM_OP_RD_SCATTER                          = 0x02,
+	    SQ_MEM_OP_GDS                                 = 0x04,
+	    SQ_MEM_OP_TF_WRITE                            = 0x05,
+	SQ_MEM_RD_WORD0__UNCACHED_bit                     = 1 << 11,
+	INDEXED_bit                                       = 1 << 12,
+	SQ_MEM_RD_WORD0__SRC_SEL_Y_mask                   = 0x03 << 13,
+	SQ_MEM_RD_WORD0__SRC_SEL_Y_shift                  = 13,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	SQ_MEM_RD_WORD0__SRC_GPR_mask                     = 0x7f << 16,
+	SQ_MEM_RD_WORD0__SRC_GPR_shift                    = 16,
+/* 	SRC_REL_bit                                       = 1 << 23, */
+	SQ_MEM_RD_WORD0__SRC_SEL_X_mask                   = 0x03 << 24,
+	SQ_MEM_RD_WORD0__SRC_SEL_X_shift                  = 24,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+	BURST_CNT_mask                                    = 0x0f << 26,
+	BURST_CNT_shift                                   = 26,
+/* 	LDS_REQ_bit                                       = 1 << 30, */
+/* 	COALESCED_READ_bit                                = 1 << 31, */
+    SQ_ALU_WORD1                                          = 0x00008dfc,
+	SQ_ALU_WORD1__ENCODING_mask                       = 0x07 << 15,
+	SQ_ALU_WORD1__ENCODING_shift                      = 15,
+	BANK_SWIZZLE_mask                                 = 0x07 << 18,
+	BANK_SWIZZLE_shift                                = 18,
+	    SQ_ALU_VEC_012                                = 0x00,
+	    SQ_ALU_VEC_021                                = 0x01,
+	    SQ_ALU_VEC_120                                = 0x02,
+	    SQ_ALU_VEC_102                                = 0x03,
+	    SQ_ALU_VEC_201                                = 0x04,
+	    SQ_ALU_VEC_210                                = 0x05,
+	SQ_ALU_WORD1__DST_GPR_mask                        = 0x7f << 21,
+	SQ_ALU_WORD1__DST_GPR_shift                       = 21,
+	SQ_ALU_WORD1__DST_REL_bit                         = 1 << 28,
+	DST_CHAN_mask                                     = 0x03 << 29,
+	DST_CHAN_shift                                    = 29,
+	    CHAN_X                                        = 0x00,
+	    CHAN_Y                                        = 0x01,
+	    CHAN_Z                                        = 0x02,
+	    CHAN_W                                        = 0x03,
+	SQ_ALU_WORD1__CLAMP_bit                           = 1 << 31,
+    SQ_CF_ALU_WORD0_EXT                                   = 0x00008dfc,
+	KCACHE_BANK_INDEX_MODE0_mask                      = 0x03 << 4,
+	KCACHE_BANK_INDEX_MODE0_shift                     = 4,
+	    SQ_CF_INDEX_NONE                              = 0x00,
+	    SQ_CF_INDEX_0                                 = 0x01,
+	    SQ_CF_INDEX_1                                 = 0x02,
+	    SQ_CF_INVALID                                 = 0x03,
+	KCACHE_BANK_INDEX_MODE1_mask                      = 0x03 << 6,
+	KCACHE_BANK_INDEX_MODE1_shift                     = 6,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	KCACHE_BANK_INDEX_MODE2_mask                      = 0x03 << 8,
+	KCACHE_BANK_INDEX_MODE2_shift                     = 8,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	KCACHE_BANK_INDEX_MODE3_mask                      = 0x03 << 10,
+	KCACHE_BANK_INDEX_MODE3_shift                     = 10,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	KCACHE_BANK2_mask                                 = 0x0f << 22,
+	KCACHE_BANK2_shift                                = 22,
+	KCACHE_BANK3_mask                                 = 0x0f << 26,
+	KCACHE_BANK3_shift                                = 26,
+	KCACHE_MODE2_mask                                 = 0x03 << 30,
+	KCACHE_MODE2_shift                                = 30,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+    SQ_ALU_WORD0_LDS_IDX_OP                               = 0x00008dfc,
+	SRC0_SEL_mask                                     = 0x1ff << 0,
+	SRC0_SEL_shift                                    = 0,
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+	SRC0_REL_bit                                      = 1 << 9,
+	SRC0_CHAN_mask                                    = 0x03 << 10,
+	SRC0_CHAN_shift                                   = 10,
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	IDX_OFFSET_4_bit                                  = 1 << 12,
+	SRC1_SEL_mask                                     = 0x1ff << 13,
+	SRC1_SEL_shift                                    = 13,
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+	SRC1_REL_bit                                      = 1 << 22,
+	SRC1_CHAN_mask                                    = 0x03 << 23,
+	SRC1_CHAN_shift                                   = 23,
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	IDX_OFFSET_5_bit                                  = 1 << 25,
+	INDEX_MODE_mask                                   = 0x07 << 26,
+	INDEX_MODE_shift                                  = 26,
+	    SQ_INDEX_AR_X                                 = 0x00,
+	    SQ_INDEX_LOOP                                 = 0x04,
+	    SQ_INDEX_GLOBAL                               = 0x05,
+	    SQ_INDEX_GLOBAL_AR_X                          = 0x06,
+	PRED_SEL_mask                                     = 0x03 << 29,
+	PRED_SEL_shift                                    = 29,
+	    SQ_PRED_SEL_OFF                               = 0x00,
+	    SQ_PRED_SEL_ZERO                              = 0x02,
+	    SQ_PRED_SEL_ONE                               = 0x03,
+	LAST_bit                                          = 1 << 31,
+    SQ_MEM_GDS_WORD2                                      = 0x00008dfc,
+	SQ_MEM_GDS_WORD2__DST_SEL_X_mask                  = 0x07 << 0,
+	SQ_MEM_GDS_WORD2__DST_SEL_X_shift                 = 0,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_GDS_WORD2__DST_SEL_Y_mask                  = 0x07 << 3,
+	SQ_MEM_GDS_WORD2__DST_SEL_Y_shift                 = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_GDS_WORD2__DST_SEL_Z_mask                  = 0x07 << 6,
+	SQ_MEM_GDS_WORD2__DST_SEL_Z_shift                 = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_GDS_WORD2__DST_SEL_W_mask                  = 0x07 << 9,
+	SQ_MEM_GDS_WORD2__DST_SEL_W_shift                 = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+    SQ_CF_ALLOC_EXPORT_WORD0_RAT                          = 0x00008dfc,
+	RAT_ID_mask                                       = 0x0f << 0,
+	RAT_ID_shift                                      = 0,
+	RAT_INST_mask                                     = 0x3f << 4,
+	RAT_INST_shift                                    = 4,
+	    SQ_EXPORT_RAT_INST_NOP                        = 0x00,
+	    SQ_EXPORT_RAT_INST_STORE_TYPED                = 0x01,
+	    SQ_EXPORT_RAT_INST_STORE_RAW                  = 0x02,
+	    SQ_EXPORT_RAT_INST_STORE_RAW_FDENORM          = 0x03,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_INT                = 0x04,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FLT                = 0x05,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM            = 0x06,
+	    SQ_EXPORT_RAT_INST_ADD                        = 0x07,
+	    SQ_EXPORT_RAT_INST_SUB                        = 0x08,
+	    SQ_EXPORT_RAT_INST_RSUB                       = 0x09,
+	    SQ_EXPORT_RAT_INST_MIN_INT                    = 0x0a,
+	    SQ_EXPORT_RAT_INST_MIN_UINT                   = 0x0b,
+	    SQ_EXPORT_RAT_INST_MAX_INT                    = 0x0c,
+	    SQ_EXPORT_RAT_INST_MAX_UINT                   = 0x0d,
+	    SQ_EXPORT_RAT_INST_AND                        = 0x0e,
+	    SQ_EXPORT_RAT_INST_OR                         = 0x0f,
+	    SQ_EXPORT_RAT_INST_XOR                        = 0x10,
+	    SQ_EXPORT_RAT_INST_MSKOR                      = 0x11,
+	    SQ_EXPORT_RAT_INST_INC_UINT                   = 0x12,
+	    SQ_EXPORT_RAT_INST_DEC_UINT                   = 0x13,
+	    SQ_EXPORT_RAT_INST_NOP_RTN                    = 0x20,
+	    SQ_EXPORT_RAT_INST_XCHG_RTN                   = 0x22,
+	    SQ_EXPORT_RAT_INST_XCHG_FDENORM_RTN           = 0x23,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_INT_RTN            = 0x24,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FLT_RTN            = 0x25,
+	    SQ_EXPORT_RAT_INST_CMPXCHG_FDENORM_RTN        = 0x26,
+	    SQ_EXPORT_RAT_INST_ADD_RTN                    = 0x27,
+	    SQ_EXPORT_RAT_INST_SUB_RTN                    = 0x28,
+	    SQ_EXPORT_RAT_INST_RSUB_RTN                   = 0x29,
+	    SQ_EXPORT_RAT_INST_MIN_INT_RTN                = 0x2a,
+	    SQ_EXPORT_RAT_INST_MIN_UINT_RTN               = 0x2b,
+	    SQ_EXPORT_RAT_INST_MAX_INT_RTN                = 0x2c,
+	    SQ_EXPORT_RAT_INST_MAX_UINT_RTN               = 0x2d,
+	    SQ_EXPORT_RAT_INST_AND_RTN                    = 0x2e,
+	    SQ_EXPORT_RAT_INST_OR_RTN                     = 0x2f,
+	    SQ_EXPORT_RAT_INST_XOR_RTN                    = 0x30,
+	    SQ_EXPORT_RAT_INST_MSKOR_RTN                  = 0x31,
+	    SQ_EXPORT_RAT_INST_INC_UINT_RTN               = 0x32,
+	    SQ_EXPORT_RAT_INST_DEC_UINT_RTN               = 0x33,
+	RAT_INDEX_MODE_mask                               = 0x03 << 11,
+	RAT_INDEX_MODE_shift                              = 11,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_mask           = 0x03 << 13,
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__TYPE_shift          = 13,
+	    SQ_EXPORT_PIXEL                               = 0x00,
+	    SQ_EXPORT_POS                                 = 0x01,
+	    SQ_EXPORT_PARAM                               = 0x02,
+	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03,
+	RW_GPR_mask                                       = 0x7f << 15,
+	RW_GPR_shift                                      = 15,
+	RW_REL_bit                                        = 1 << 22,
+	INDEX_GPR_mask                                    = 0x7f << 23,
+	INDEX_GPR_shift                                   = 23,
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_mask      = 0x03 << 30,
+	SQ_CF_ALLOC_EXPORT_WORD0_RAT__ELEM_SIZE_shift     = 30,
+    SQ_CF_ALU_WORD0                                       = 0x00008dfc,
+	SQ_CF_ALU_WORD0__ADDR_mask                        = 0x3fffff << 0,
+	SQ_CF_ALU_WORD0__ADDR_shift                       = 0,
+	KCACHE_BANK0_mask                                 = 0x0f << 22,
+	KCACHE_BANK0_shift                                = 22,
+	KCACHE_BANK1_mask                                 = 0x0f << 26,
+	KCACHE_BANK1_shift                                = 26,
+	KCACHE_MODE0_mask                                 = 0x03 << 30,
+	KCACHE_MODE0_shift                                = 30,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+    SQ_MEM_GDS_WORD1                                      = 0x00008dfc,
+	SQ_MEM_GDS_WORD1__DST_GPR_mask                    = 0x7f << 0,
+	SQ_MEM_GDS_WORD1__DST_GPR_shift                   = 0,
+	DST_REL_MODE_mask                                 = 0x03 << 7,
+	DST_REL_MODE_shift                                = 7,
+	    SQ_REL_NONE                                   = 0x00,
+	    SQ_REL_LOOP                                   = 0x01,
+	    SQ_REL_GLOBAL                                 = 0x02,
+	GDS_OP_mask                                       = 0x3f << 9,
+	GDS_OP_shift                                      = 9,
+	    SQ_DS_INST_ADD                                = 0x00,
+	    SQ_DS_INST_SUB                                = 0x01,
+	    SQ_DS_INST_RSUB                               = 0x02,
+	    SQ_DS_INST_INC                                = 0x03,
+	    SQ_DS_INST_DEC                                = 0x04,
+	    SQ_DS_INST_MIN_INT                            = 0x05,
+	    SQ_DS_INST_MAX_INT                            = 0x06,
+	    SQ_DS_INST_MIN_UINT                           = 0x07,
+	    SQ_DS_INST_MAX_UINT                           = 0x08,
+	    SQ_DS_INST_AND                                = 0x09,
+	    SQ_DS_INST_OR                                 = 0x0a,
+	    SQ_DS_INST_XOR                                = 0x0b,
+	    SQ_DS_INST_MSKOR                              = 0x0c,
+	    SQ_DS_INST_WRITE                              = 0x0d,
+	    SQ_DS_INST_WRITE_REL                          = 0x0e,
+	    SQ_DS_INST_WRITE2                             = 0x0f,
+	    SQ_DS_INST_CMP_STORE                          = 0x10,
+	    SQ_DS_INST_CMP_STORE_SPF                      = 0x11,
+	    SQ_DS_INST_BYTE_WRITE                         = 0x12,
+	    SQ_DS_INST_SHORT_WRITE                        = 0x13,
+	    SQ_DS_INST_ADD_RET                            = 0x20,
+	    SQ_DS_INST_SUB_RET                            = 0x21,
+	    SQ_DS_INST_RSUB_RET                           = 0x22,
+	    SQ_DS_INST_INC_RET                            = 0x23,
+	    SQ_DS_INST_DEC_RET                            = 0x24,
+	    SQ_DS_INST_MIN_INT_RET                        = 0x25,
+	    SQ_DS_INST_MAX_INT_RET                        = 0x26,
+	    SQ_DS_INST_MIN_UINT_RET                       = 0x27,
+	    SQ_DS_INST_MAX_UINT_RET                       = 0x28,
+	    SQ_DS_INST_AND_RET                            = 0x29,
+	    SQ_DS_INST_OR_RET                             = 0x2a,
+	    SQ_DS_INST_XOR_RET                            = 0x2b,
+	    SQ_DS_INST_MSKOR_RET                          = 0x2c,
+	    SQ_DS_INST_XCHG_RET                           = 0x2d,
+	    SQ_DS_INST_XCHG_REL_RET                       = 0x2e,
+	    SQ_DS_INST_XCHG2_RET                          = 0x2f,
+	    SQ_DS_INST_CMP_XCHG_RET                       = 0x30,
+	    SQ_DS_INST_CMP_XCHG_SPF_RET                   = 0x31,
+	    SQ_DS_INST_READ_RET                           = 0x32,
+	    SQ_DS_INST_READ_REL_RET                       = 0x33,
+	    SQ_DS_INST_READ2_RET                          = 0x34,
+	    SQ_DS_INST_READWRITE_RET                      = 0x35,
+	    SQ_DS_INST_BYTE_READ_RET                      = 0x36,
+	    SQ_DS_INST_UBYTE_READ_RET                     = 0x37,
+	    SQ_DS_INST_SHORT_READ_RET                     = 0x38,
+	    SQ_DS_INST_USHORT_READ_RET                    = 0x39,
+	    SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET           = 0x3f,
+	DS_OFFSET_mask                                    = 0x7f << 16,
+	DS_OFFSET_shift                                   = 16,
+	UAV_INDEX_MODE_mask                               = 0x03 << 24,
+	UAV_INDEX_MODE_shift                              = 24,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	UAV_ID_mask                                       = 0x0f << 26,
+	UAV_ID_shift                                      = 26,
+	ALLOC_CONSUME_bit                                 = 1 << 30,
+	BCAST_FIRST_REQ_bit                               = 1 << 31,
+    SQ_MEM_RD_WORD2                                       = 0x00008dfc,
+	ARRAY_BASE_mask                                   = 0x1fff << 0,
+	ARRAY_BASE_shift                                  = 0,
+	SQ_MEM_RD_WORD2__ENDIAN_SWAP_mask                 = 0x03 << 16,
+	SQ_MEM_RD_WORD2__ENDIAN_SWAP_shift                = 16,
+	    SQ_ENDIAN_NONE                                = 0x00,
+	    SQ_ENDIAN_8IN16                               = 0x01,
+	    SQ_ENDIAN_8IN32                               = 0x02,
+	SQ_MEM_RD_WORD2__ARRAY_SIZE_mask                  = 0xfff << 20,
+	SQ_MEM_RD_WORD2__ARRAY_SIZE_shift                 = 20,
+    SQ_CF_ALU_WORD1_EXT                                   = 0x00008dfc,
+	KCACHE_MODE3_mask                                 = 0x03 << 0,
+	KCACHE_MODE3_shift                                = 0,
+/* 	    SQ_CF_KCACHE_NOP                              = 0x00, */
+/* 	    SQ_CF_KCACHE_LOCK_1                           = 0x01, */
+/* 	    SQ_CF_KCACHE_LOCK_2                           = 0x02, */
+/* 	    SQ_CF_KCACHE_LOCK_LOOP_INDEX                  = 0x03, */
+	KCACHE_ADDR2_mask                                 = 0xff << 2,
+	KCACHE_ADDR2_shift                                = 2,
+	KCACHE_ADDR3_mask                                 = 0xff << 10,
+	KCACHE_ADDR3_shift                                = 10,
+	SQ_CF_ALU_WORD1_EXT__CF_INST_mask                 = 0x0f << 26,
+	SQ_CF_ALU_WORD1_EXT__CF_INST_shift                = 26,
+/* 	    SQ_CF_INST_ALU                                = 0x08, */
+/* 	    SQ_CF_INST_ALU_PUSH_BEFORE                    = 0x09, */
+/* 	    SQ_CF_INST_ALU_POP_AFTER                      = 0x0a, */
+/* 	    SQ_CF_INST_ALU_POP2_AFTER                     = 0x0b, */
+/* 	    SQ_CF_INST_ALU_EXTENDED                       = 0x0c, */
+/* 	    SQ_CF_INST_ALU_CONTINUE                       = 0x0d, */
+/* 	    SQ_CF_INST_ALU_BREAK                          = 0x0e, */
+/* 	    SQ_CF_INST_ALU_ELSE_AFTER                     = 0x0f, */
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_CF_GWS_WORD0                                       = 0x00008dfc,
+	VALUE_mask                                        = 0x3ff << 0,
+	VALUE_shift                                       = 0,
+	RESOURCE_mask                                     = 0x1f << 16,
+	RESOURCE_shift                                    = 16,
+	SIGN_bit                                          = 1 << 25,
+	VAL_INDEX_MODE_mask                               = 0x03 << 26,
+	VAL_INDEX_MODE_shift                              = 26,
+	    SQ_GWS_INDEX_NONE                             = 0x00,
+	    SQ_GWS_INDEX_0                                = 0x01,
+	    SQ_GWS_INDEX_1                                = 0x02,
+	    SQ_GWS_INDEX_MIX                              = 0x03,
+	RSRC_INDEX_MODE_mask                              = 0x03 << 28,
+	RSRC_INDEX_MODE_shift                             = 28,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	GWS_OPCODE_mask                                   = 0x03 << 30,
+	GWS_OPCODE_shift                                  = 30,
+	    SQ_GWS_SEMA_V                                 = 0x00,
+	    SQ_GWS_SEMA_P                                 = 0x01,
+	    SQ_GWS_BARRIER                                = 0x02,
+	    SQ_GWS_INIT                                   = 0x03,
+    SQ_VTX_WORD2                                          = 0x00008dfc,
+	SQ_VTX_WORD2__OFFSET_mask                         = 0xffff << 0,
+	SQ_VTX_WORD2__OFFSET_shift                        = 0,
+	SQ_VTX_WORD2__ENDIAN_SWAP_mask                    = 0x03 << 16,
+	SQ_VTX_WORD2__ENDIAN_SWAP_shift                   = 16,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+	CONST_BUF_NO_STRIDE_bit                           = 1 << 18,
+	SQ_VTX_WORD2__ALT_CONST_bit                       = 1 << 20,
+	BUFFER_INDEX_MODE_mask                            = 0x03 << 21,
+	BUFFER_INDEX_MODE_shift                           = 21,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+    SQ_ALU_WORD1_OP2_EXECUTE_MASK                         = 0x00008dfc,
+	EXECUTE_MASK_OP_mask                              = 0x03 << 5,
+	EXECUTE_MASK_OP_shift                             = 5,
+	    SQ_ALU_EXECUTE_MASK_OP_DEACTIVATE             = 0x00,
+	    SQ_ALU_EXECUTE_MASK_OP_BREAK                  = 0x01,
+	    SQ_ALU_EXECUTE_MASK_OP_CONTINUE               = 0x02,
+	    SQ_ALU_EXECUTE_MASK_OP_KILL                   = 0x03,
+    SQ_CF_ALLOC_EXPORT_WORD1_BUF                          = 0x00008dfc,
+	SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_mask     = 0xfff << 0,
+	SQ_CF_ALLOC_EXPORT_WORD1_BUF__ARRAY_SIZE_shift    = 0,
+	COMP_MASK_mask                                    = 0x0f << 12,
+	COMP_MASK_shift                                   = 12,
+    SQ_CF_WORD0                                           = 0x00008dfc,
+	SQ_CF_WORD0__ADDR_mask                            = 0xffffff << 0,
+	SQ_CF_WORD0__ADDR_shift                           = 0,
+	JUMPTABLE_SEL_mask                                = 0x07 << 24,
+	JUMPTABLE_SEL_shift                               = 24,
+	    SQ_CF_JUMPTABLE_SEL_CONST_A                   = 0x00,
+	    SQ_CF_JUMPTABLE_SEL_CONST_B                   = 0x01,
+	    SQ_CF_JUMPTABLE_SEL_CONST_C                   = 0x02,
+	    SQ_CF_JUMPTABLE_SEL_CONST_D                   = 0x03,
+	    SQ_CF_JUMPTABLE_SEL_INDEX_0                   = 0x04,
+	    SQ_CF_JUMPTABLE_SEL_INDEX_1                   = 0x05,
+    SQ_CF_ALLOC_EXPORT_WORD0                              = 0x00008dfc,
+/* 	ARRAY_BASE_mask                                   = 0x1fff << 0, */
+/* 	ARRAY_BASE_shift                                  = 0, */
+	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_mask               = 0x03 << 13,
+	SQ_CF_ALLOC_EXPORT_WORD0__TYPE_shift              = 13,
+/* 	    SQ_EXPORT_PIXEL                               = 0x00, */
+/* 	    SQ_EXPORT_POS                                 = 0x01, */
+/* 	    SQ_EXPORT_PARAM                               = 0x02, */
+/* 	    X_UNUSED_FOR_SX_EXPORTS                       = 0x03, */
+/* 	RW_GPR_mask                                       = 0x7f << 15, */
+/* 	RW_GPR_shift                                      = 15, */
+/* 	RW_REL_bit                                        = 1 << 22, */
+/* 	INDEX_GPR_mask                                    = 0x7f << 23, */
+/* 	INDEX_GPR_shift                                   = 23, */
+	SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_mask          = 0x03 << 30,
+	SQ_CF_ALLOC_EXPORT_WORD0__ELEM_SIZE_shift         = 30,
+    SQ_MEM_GDS_WORD0                                      = 0x00008dfc,
+/* 	MEM_INST_mask                                     = 0x1f << 0, */
+/* 	MEM_INST_shift                                    = 0, */
+/* 	    SQ_MEM_INST_MEM                               = 0x02, */
+/* 	MEM_OP_mask                                       = 0x07 << 8, */
+/* 	MEM_OP_shift                                      = 8, */
+/* 	    SQ_MEM_OP_RD_SCRATCH                          = 0x00, */
+/* 	    SQ_MEM_OP_RD_SCATTER                          = 0x02, */
+/* 	    SQ_MEM_OP_GDS                                 = 0x04, */
+/* 	    SQ_MEM_OP_TF_WRITE                            = 0x05, */
+	SQ_MEM_GDS_WORD0__SRC_GPR_mask                    = 0x7f << 11,
+	SQ_MEM_GDS_WORD0__SRC_GPR_shift                   = 11,
+	SRC_REL_MODE_mask                                 = 0x03 << 18,
+	SRC_REL_MODE_shift                                = 18,
+/* 	    SQ_REL_NONE                                   = 0x00, */
+/* 	    SQ_REL_LOOP                                   = 0x01, */
+/* 	    SQ_REL_GLOBAL                                 = 0x02, */
+	SQ_MEM_GDS_WORD0__SRC_SEL_X_mask                  = 0x07 << 20,
+	SQ_MEM_GDS_WORD0__SRC_SEL_X_shift                 = 20,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_MEM_GDS_WORD0__SRC_SEL_Y_mask                  = 0x07 << 23,
+	SQ_MEM_GDS_WORD0__SRC_SEL_Y_shift                 = 23,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	SRC_SEL_Z_mask                                    = 0x07 << 26, */
+/* 	SRC_SEL_Z_shift                                   = 26, */
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_ALU_WORD1_LDS_DIRECT_LITERAL_HI                    = 0x00008dfc,
+	OFFSET_B_mask                                     = 0x1fff << 0,
+	OFFSET_B_shift                                    = 0,
+	STRIDE_B_mask                                     = 0x7f << 13,
+	STRIDE_B_shift                                    = 13,
+	THREAD_REL_B_bit                                  = 1 << 22,
+	DIRECT_READ_32_bit                                = 1 << 31,
+    SQ_VTX_WORD1                                          = 0x00008dfc,
+	SQ_VTX_WORD1__DST_SEL_X_mask                      = 0x07 << 9,
+	SQ_VTX_WORD1__DST_SEL_X_shift                     = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_Y_mask                      = 0x07 << 12,
+	SQ_VTX_WORD1__DST_SEL_Y_shift                     = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_Z_mask                      = 0x07 << 15,
+	SQ_VTX_WORD1__DST_SEL_Z_shift                     = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_VTX_WORD1__DST_SEL_W_mask                      = 0x07 << 18,
+	SQ_VTX_WORD1__DST_SEL_W_shift                     = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	USE_CONST_FIELDS_bit                              = 1 << 21,
+	SQ_VTX_WORD1__DATA_FORMAT_mask                    = 0x3f << 22,
+	SQ_VTX_WORD1__DATA_FORMAT_shift                   = 22,
+	SQ_VTX_WORD1__NUM_FORMAT_ALL_mask                 = 0x03 << 28,
+	SQ_VTX_WORD1__NUM_FORMAT_ALL_shift                = 28,
+	    SQ_NUM_FORMAT_NORM                            = 0x00,
+	    SQ_NUM_FORMAT_INT                             = 0x01,
+	    SQ_NUM_FORMAT_SCALED                          = 0x02,
+	SQ_VTX_WORD1__FORMAT_COMP_ALL_bit                 = 1 << 30,
+	SQ_VTX_WORD1__SRF_MODE_ALL_bit                    = 1 << 31,
+    SQ_ALU_WORD1_OP2                                      = 0x00008dfc,
+	SRC0_ABS_bit                                      = 1 << 0,
+	SRC1_ABS_bit                                      = 1 << 1,
+	UPDATE_EXECUTE_MASK_bit                           = 1 << 2,
+	UPDATE_PRED_bit                                   = 1 << 3,
+	WRITE_MASK_bit                                    = 1 << 4,
+	OMOD_mask                                         = 0x03 << 5,
+	OMOD_shift                                        = 5,
+	    SQ_ALU_OMOD_OFF                               = 0x00,
+	    SQ_ALU_OMOD_M2                                = 0x01,
+	    SQ_ALU_OMOD_M4                                = 0x02,
+	    SQ_ALU_OMOD_D2                                = 0x03,
+	SQ_ALU_WORD1_OP2__ALU_INST_mask                   = 0x7ff << 7,
+	SQ_ALU_WORD1_OP2__ALU_INST_shift                  = 7,
+	    SQ_OP2_INST_ADD                               = 0x00,
+	    SQ_OP2_INST_MUL                               = 0x01,
+	    SQ_OP2_INST_MUL_IEEE                          = 0x02,
+	    SQ_OP2_INST_MAX                               = 0x03,
+	    SQ_OP2_INST_MIN                               = 0x04,
+	    SQ_OP2_INST_MAX_DX10                          = 0x05,
+	    SQ_OP2_INST_MIN_DX10                          = 0x06,
+	    SQ_OP2_INST_SETE                              = 0x08,
+	    SQ_OP2_INST_SETGT                             = 0x09,
+	    SQ_OP2_INST_SETGE                             = 0x0a,
+	    SQ_OP2_INST_SETNE                             = 0x0b,
+	    SQ_OP2_INST_SETE_DX10                         = 0x0c,
+	    SQ_OP2_INST_SETGT_DX10                        = 0x0d,
+	    SQ_OP2_INST_SETGE_DX10                        = 0x0e,
+	    SQ_OP2_INST_SETNE_DX10                        = 0x0f,
+	    SQ_OP2_INST_FRACT                             = 0x10,
+	    SQ_OP2_INST_TRUNC                             = 0x11,
+	    SQ_OP2_INST_CEIL                              = 0x12,
+	    SQ_OP2_INST_RNDNE                             = 0x13,
+	    SQ_OP2_INST_FLOOR                             = 0x14,
+	    SQ_OP2_INST_ASHR_INT                          = 0x15,
+	    SQ_OP2_INST_LSHR_INT                          = 0x16,
+	    SQ_OP2_INST_LSHL_INT                          = 0x17,
+	    SQ_OP2_INST_MOV                               = 0x19,
+	    SQ_OP2_INST_NOP                               = 0x1a,
+	    SQ_OP2_INST_PRED_SETGT_UINT                   = 0x1e,
+	    SQ_OP2_INST_PRED_SETGE_UINT                   = 0x1f,
+	    SQ_OP2_INST_PRED_SETE                         = 0x20,
+	    SQ_OP2_INST_PRED_SETGT                        = 0x21,
+	    SQ_OP2_INST_PRED_SETGE                        = 0x22,
+	    SQ_OP2_INST_PRED_SETNE                        = 0x23,
+	    SQ_OP2_INST_PRED_SET_INV                      = 0x24,
+	    SQ_OP2_INST_PRED_SET_POP                      = 0x25,
+	    SQ_OP2_INST_PRED_SET_CLR                      = 0x26,
+	    SQ_OP2_INST_PRED_SET_RESTORE                  = 0x27,
+	    SQ_OP2_INST_PRED_SETE_PUSH                    = 0x28,
+	    SQ_OP2_INST_PRED_SETGT_PUSH                   = 0x29,
+	    SQ_OP2_INST_PRED_SETGE_PUSH                   = 0x2a,
+	    SQ_OP2_INST_PRED_SETNE_PUSH                   = 0x2b,
+	    SQ_OP2_INST_KILLE                             = 0x2c,
+	    SQ_OP2_INST_KILLGT                            = 0x2d,
+	    SQ_OP2_INST_KILLGE                            = 0x2e,
+	    SQ_OP2_INST_KILLNE                            = 0x2f,
+	    SQ_OP2_INST_AND_INT                           = 0x30,
+	    SQ_OP2_INST_OR_INT                            = 0x31,
+	    SQ_OP2_INST_XOR_INT                           = 0x32,
+	    SQ_OP2_INST_NOT_INT                           = 0x33,
+	    SQ_OP2_INST_ADD_INT                           = 0x34,
+	    SQ_OP2_INST_SUB_INT                           = 0x35,
+	    SQ_OP2_INST_MAX_INT                           = 0x36,
+	    SQ_OP2_INST_MIN_INT                           = 0x37,
+	    SQ_OP2_INST_MAX_UINT                          = 0x38,
+	    SQ_OP2_INST_MIN_UINT                          = 0x39,
+	    SQ_OP2_INST_SETE_INT                          = 0x3a,
+	    SQ_OP2_INST_SETGT_INT                         = 0x3b,
+	    SQ_OP2_INST_SETGE_INT                         = 0x3c,
+	    SQ_OP2_INST_SETNE_INT                         = 0x3d,
+	    SQ_OP2_INST_SETGT_UINT                        = 0x3e,
+	    SQ_OP2_INST_SETGE_UINT                        = 0x3f,
+	    SQ_OP2_INST_KILLGT_UINT                       = 0x40,
+	    SQ_OP2_INST_KILLGE_UINT                       = 0x41,
+	    SQ_OP2_INST_PRED_SETE_INT                     = 0x42,
+	    SQ_OP2_INST_PRED_SETGT_INT                    = 0x43,
+	    SQ_OP2_INST_PRED_SETGE_INT                    = 0x44,
+	    SQ_OP2_INST_PRED_SETNE_INT                    = 0x45,
+	    SQ_OP2_INST_KILLE_INT                         = 0x46,
+	    SQ_OP2_INST_KILLGT_INT                        = 0x47,
+	    SQ_OP2_INST_KILLGE_INT                        = 0x48,
+	    SQ_OP2_INST_KILLNE_INT                        = 0x49,
+	    SQ_OP2_INST_PRED_SETE_PUSH_INT                = 0x4a,
+	    SQ_OP2_INST_PRED_SETGT_PUSH_INT               = 0x4b,
+	    SQ_OP2_INST_PRED_SETGE_PUSH_INT               = 0x4c,
+	    SQ_OP2_INST_PRED_SETNE_PUSH_INT               = 0x4d,
+	    SQ_OP2_INST_PRED_SETLT_PUSH_INT               = 0x4e,
+	    SQ_OP2_INST_PRED_SETLE_PUSH_INT               = 0x4f,
+	    SQ_OP2_INST_FLT_TO_INT                        = 0x50,
+	    SQ_OP2_INST_BFREV_INT                         = 0x51,
+	    SQ_OP2_INST_ADDC_UINT                         = 0x52,
+	    SQ_OP2_INST_SUBB_UINT                         = 0x53,
+	    SQ_OP2_INST_GROUP_BARRIER                     = 0x54,
+	    SQ_OP2_INST_GROUP_SEQ_BEGIN                   = 0x55,
+	    SQ_OP2_INST_GROUP_SEQ_END                     = 0x56,
+	    SQ_OP2_INST_SET_MODE                          = 0x57,
+	    SQ_OP2_INST_SET_CF_IDX0                       = 0x58,
+	    SQ_OP2_INST_SET_CF_IDX1                       = 0x59,
+	    SQ_OP2_INST_SET_LDS_SIZE                      = 0x5a,
+	    SQ_OP2_INST_EXP_IEEE                          = 0x81,
+	    SQ_OP2_INST_LOG_CLAMPED                       = 0x82,
+	    SQ_OP2_INST_LOG_IEEE                          = 0x83,
+	    SQ_OP2_INST_RECIP_CLAMPED                     = 0x84,
+	    SQ_OP2_INST_RECIP_FF                          = 0x85,
+	    SQ_OP2_INST_RECIP_IEEE                        = 0x86,
+	    SQ_OP2_INST_RECIPSQRT_CLAMPED                 = 0x87,
+	    SQ_OP2_INST_RECIPSQRT_FF                      = 0x88,
+	    SQ_OP2_INST_RECIPSQRT_IEEE                    = 0x89,
+	    SQ_OP2_INST_SQRT_IEEE                         = 0x8a,
+	    SQ_OP2_INST_SIN                               = 0x8d,
+	    SQ_OP2_INST_COS                               = 0x8e,
+	    SQ_OP2_INST_MULLO_INT                         = 0x8f,
+	    SQ_OP2_INST_MULHI_INT                         = 0x90,
+	    SQ_OP2_INST_MULLO_UINT                        = 0x91,
+	    SQ_OP2_INST_MULHI_UINT                        = 0x92,
+	    SQ_OP2_INST_RECIP_INT                         = 0x93,
+	    SQ_OP2_INST_RECIP_UINT                        = 0x94,
+	    SQ_OP2_INST_RECIP_64                          = 0x95,
+	    SQ_OP2_INST_RECIP_CLAMPED_64                  = 0x96,
+	    SQ_OP2_INST_RECIPSQRT_64                      = 0x97,
+	    SQ_OP2_INST_RECIPSQRT_CLAMPED_64              = 0x98,
+	    SQ_OP2_INST_SQRT_64                           = 0x99,
+	    SQ_OP2_INST_FLT_TO_UINT                       = 0x9a,
+	    SQ_OP2_INST_INT_TO_FLT                        = 0x9b,
+	    SQ_OP2_INST_UINT_TO_FLT                       = 0x9c,
+	    SQ_OP2_INST_BFM_INT                           = 0xa0,
+	    SQ_OP2_INST_FLT32_TO_FLT16                    = 0xa2,
+	    SQ_OP2_INST_FLT16_TO_FLT32                    = 0xa3,
+	    SQ_OP2_INST_UBYTE0_FLT                        = 0xa4,
+	    SQ_OP2_INST_UBYTE1_FLT                        = 0xa5,
+	    SQ_OP2_INST_UBYTE2_FLT                        = 0xa6,
+	    SQ_OP2_INST_UBYTE3_FLT                        = 0xa7,
+	    SQ_OP2_INST_BCNT_INT                          = 0xaa,
+	    SQ_OP2_INST_FFBH_UINT                         = 0xab,
+	    SQ_OP2_INST_FFBL_INT                          = 0xac,
+	    SQ_OP2_INST_FFBH_INT                          = 0xad,
+	    SQ_OP2_INST_FLT_TO_UINT4                      = 0xae,
+	    SQ_OP2_INST_DOT_IEEE                          = 0xaf,
+	    SQ_OP2_INST_FLT_TO_INT_RPI                    = 0xb0,
+	    SQ_OP2_INST_FLT_TO_INT_FLOOR                  = 0xb1,
+	    SQ_OP2_INST_MULHI_UINT24                      = 0xb2,
+	    SQ_OP2_INST_MBCNT_32HI_INT                    = 0xb3,
+	    SQ_OP2_INST_OFFSET_TO_FLT                     = 0xb4,
+	    SQ_OP2_INST_MUL_UINT24                        = 0xb5,
+	    SQ_OP2_INST_BCNT_ACCUM_PREV_INT               = 0xb6,
+	    SQ_OP2_INST_MBCNT_32LO_ACCUM_PREV_INT         = 0xb7,
+	    SQ_OP2_INST_SETE_64                           = 0xb8,
+	    SQ_OP2_INST_SETNE_64                          = 0xb9,
+	    SQ_OP2_INST_SETGT_64                          = 0xba,
+	    SQ_OP2_INST_SETGE_64                          = 0xbb,
+	    SQ_OP2_INST_MIN_64                            = 0xbc,
+	    SQ_OP2_INST_MAX_64                            = 0xbd,
+	    SQ_OP2_INST_DOT4                              = 0xbe,
+	    SQ_OP2_INST_DOT4_IEEE                         = 0xbf,
+	    SQ_OP2_INST_CUBE                              = 0xc0,
+	    SQ_OP2_INST_MAX4                              = 0xc1,
+	    SQ_OP2_INST_FREXP_64                          = 0xc4,
+	    SQ_OP2_INST_LDEXP_64                          = 0xc5,
+	    SQ_OP2_INST_FRACT_64                          = 0xc6,
+	    SQ_OP2_INST_PRED_SETGT_64                     = 0xc7,
+	    SQ_OP2_INST_PRED_SETE_64                      = 0xc8,
+	    SQ_OP2_INST_PRED_SETGE_64                     = 0xc9,
+	    SQ_OP2_INST_MUL_64                            = 0xca,
+	    SQ_OP2_INST_ADD_64                            = 0xcb,
+	    SQ_OP2_INST_MOVA_INT                          = 0xcc,
+	    SQ_OP2_INST_FLT64_TO_FLT32                    = 0xcd,
+	    SQ_OP2_INST_FLT32_TO_FLT64                    = 0xce,
+	    SQ_OP2_INST_SAD_ACCUM_PREV_UINT               = 0xcf,
+	    SQ_OP2_INST_DOT                               = 0xd0,
+	    SQ_OP2_INST_MUL_PREV                          = 0xd1,
+	    SQ_OP2_INST_MUL_IEEE_PREV                     = 0xd2,
+	    SQ_OP2_INST_ADD_PREV                          = 0xd3,
+	    SQ_OP2_INST_MULADD_PREV                       = 0xd4,
+	    SQ_OP2_INST_MULADD_IEEE_PREV                  = 0xd5,
+	    SQ_OP2_INST_INTERP_XY                         = 0xd6,
+	    SQ_OP2_INST_INTERP_ZW                         = 0xd7,
+	    SQ_OP2_INST_INTERP_X                          = 0xd8,
+	    SQ_OP2_INST_INTERP_Z                          = 0xd9,
+	    SQ_OP2_INST_STORE_FLAGS                       = 0xda,
+	    SQ_OP2_INST_LOAD_STORE_FLAGS                  = 0xdb,
+	    SQ_OP2_INST_INTERP_LOAD_P0                    = 0xe0,
+	    SQ_OP2_INST_INTERP_LOAD_P10                   = 0xe1,
+	    SQ_OP2_INST_INTERP_LOAD_P20                   = 0xe2,
+    SQ_CF_WORD1                                           = 0x00008dfc,
+	POP_COUNT_mask                                    = 0x07 << 0,
+	POP_COUNT_shift                                   = 0,
+	CF_CONST_mask                                     = 0x1f << 3,
+	CF_CONST_shift                                    = 3,
+	COND_mask                                         = 0x03 << 8,
+	COND_shift                                        = 8,
+	    SQ_CF_COND_ACTIVE                             = 0x00,
+	    SQ_CF_COND_FALSE                              = 0x01,
+	    SQ_CF_COND_BOOL                               = 0x02,
+	    SQ_CF_COND_NOT_BOOL                           = 0x03,
+	SQ_CF_WORD1__COUNT_mask                           = 0x3f << 10,
+	SQ_CF_WORD1__COUNT_shift                          = 10,
+/* 	VALID_PIXEL_MODE_bit                              = 1 << 20, */
+	SQ_CF_WORD1__CF_INST_mask                         = 0xff << 22,
+	SQ_CF_WORD1__CF_INST_shift                        = 22,
+	    SQ_CF_INST_NOP                                = 0x00,
+	    SQ_CF_INST_TC                                 = 0x01,
+	    SQ_CF_INST_VC                                 = 0x02,
+	    SQ_CF_INST_GDS                                = 0x03,
+	    SQ_CF_INST_LOOP_START                         = 0x04,
+	    SQ_CF_INST_LOOP_END                           = 0x05,
+	    SQ_CF_INST_LOOP_START_DX10                    = 0x06,
+	    SQ_CF_INST_LOOP_START_NO_AL                   = 0x07,
+	    SQ_CF_INST_LOOP_CONTINUE                      = 0x08,
+	    SQ_CF_INST_LOOP_BREAK                         = 0x09,
+	    SQ_CF_INST_JUMP                               = 0x0a,
+	    SQ_CF_INST_PUSH                               = 0x0b,
+	    SQ_CF_INST_ELSE                               = 0x0d,
+	    SQ_CF_INST_POP                                = 0x0e,
+	    SQ_CF_INST_CALL                               = 0x12,
+	    SQ_CF_INST_CALL_FS                            = 0x13,
+	    SQ_CF_INST_RETURN                             = 0x14,
+	    SQ_CF_INST_EMIT_VERTEX                        = 0x15,
+	    SQ_CF_INST_EMIT_CUT_VERTEX                    = 0x16,
+	    SQ_CF_INST_CUT_VERTEX                         = 0x17,
+	    SQ_CF_INST_KILL                               = 0x18,
+	    SQ_CF_INST_WAIT_ACK                           = 0x1a,
+	    SQ_CF_INST_TC_ACK                             = 0x1b,
+	    SQ_CF_INST_VC_ACK                             = 0x1c,
+	    SQ_CF_INST_JUMPTABLE                          = 0x1d,
+	    SQ_CF_INST_GLOBAL_WAVE_SYNC                   = 0x1e,
+	    SQ_CF_INST_HALT                               = 0x1f,
+	    SQ_CF_INST_END                                = 0x20,
+	    SQ_CF_INST_LDS_DEALLOC                        = 0x21,
+	    SQ_CF_INST_PUSH_WQM                           = 0x22,
+	    SQ_CF_INST_POP_WQM                            = 0x23,
+	    SQ_CF_INST_ELSE_WQM                           = 0x24,
+	    SQ_CF_INST_JUMP_ANY                           = 0x25,
+	    SQ_CF_INST_REACTIVATE                         = 0x26,
+	    SQ_CF_INST_REACTIVATE_WQM                     = 0x27,
+	    SQ_CF_INST_INTERRUPT                          = 0x28,
+	    SQ_CF_INST_INTERRUPT_AND_SLEEP                = 0x29,
+	    SQ_CF_INST_SET_PRIORITY                       = 0x2a,
+/* 	BARRIER_bit                                       = 1 << 31, */
+    SQ_VTX_WORD1_SEM                                      = 0x00008dfc,
+	SEMANTIC_ID_mask                                  = 0xff << 0,
+	SEMANTIC_ID_shift                                 = 0,
+    SQ_TEX_WORD0                                          = 0x00008dfc,
+	TEX_INST_mask                                     = 0x1f << 0,
+	TEX_INST_shift                                    = 0,
+	    SQ_TEX_INST_LD                                = 0x03,
+	    SQ_TEX_INST_GET_TEXTURE_RESINFO               = 0x04,
+	    SQ_TEX_INST_GET_NUMBER_OF_SAMPLES             = 0x05,
+	    SQ_TEX_INST_GET_LOD                           = 0x06,
+	    SQ_TEX_INST_GET_GRADIENTS_H                   = 0x07,
+	    SQ_TEX_INST_GET_GRADIENTS_V                   = 0x08,
+	    SQ_TEX_INST_SET_TEXTURE_OFFSETS               = 0x09,
+	    SQ_TEX_INST_KEEP_GRADIENTS                    = 0x0a,
+	    SQ_TEX_INST_SET_GRADIENTS_H                   = 0x0b,
+	    SQ_TEX_INST_SET_GRADIENTS_V                   = 0x0c,
+	    SQ_TEX_INST_PASS                              = 0x0d,
+	    SQ_TEX_INST_SAMPLE                            = 0x10,
+	    SQ_TEX_INST_SAMPLE_L                          = 0x11,
+	    SQ_TEX_INST_SAMPLE_LB                         = 0x12,
+	    SQ_TEX_INST_SAMPLE_LZ                         = 0x13,
+	    SQ_TEX_INST_SAMPLE_G                          = 0x14,
+	    SQ_TEX_INST_GATHER4                           = 0x15,
+	    SQ_TEX_INST_SAMPLE_G_LB                       = 0x16,
+	    SQ_TEX_INST_GATHER4_O                         = 0x17,
+	    SQ_TEX_INST_SAMPLE_C                          = 0x18,
+	    SQ_TEX_INST_SAMPLE_C_L                        = 0x19,
+	    SQ_TEX_INST_SAMPLE_C_LB                       = 0x1a,
+	    SQ_TEX_INST_SAMPLE_C_LZ                       = 0x1b,
+	    SQ_TEX_INST_SAMPLE_C_G                        = 0x1c,
+	    SQ_TEX_INST_GATHER4_C                         = 0x1d,
+	    SQ_TEX_INST_SAMPLE_C_G_LB                     = 0x1e,
+	    SQ_TEX_INST_GATHER4_C_O                       = 0x1f,
+	INST_MOD_mask                                     = 0x03 << 5,
+	INST_MOD_shift                                    = 5,
+/* 	FETCH_WHOLE_QUAD_bit                              = 1 << 7, */
+	RESOURCE_ID_mask                                  = 0xff << 8,
+	RESOURCE_ID_shift                                 = 8,
+	SQ_TEX_WORD0__SRC_GPR_mask                        = 0x7f << 16,
+	SQ_TEX_WORD0__SRC_GPR_shift                       = 16,
+/* 	SRC_REL_bit                                       = 1 << 23, */
+	SQ_TEX_WORD0__ALT_CONST_bit                       = 1 << 24,
+	RESOURCE_INDEX_MODE_mask                          = 0x03 << 25,
+	RESOURCE_INDEX_MODE_shift                         = 25,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+	SAMPLER_INDEX_MODE_mask                           = 0x03 << 27,
+	SAMPLER_INDEX_MODE_shift                          = 27,
+/* 	    SQ_CF_INDEX_NONE                              = 0x00, */
+/* 	    SQ_CF_INDEX_0                                 = 0x01, */
+/* 	    SQ_CF_INDEX_1                                 = 0x02, */
+/* 	    SQ_CF_INVALID                                 = 0x03, */
+    SQ_VTX_WORD1_GPR                                      = 0x00008dfc,
+	SQ_VTX_WORD1_GPR__DST_GPR_mask                    = 0x7f << 0,
+	SQ_VTX_WORD1_GPR__DST_GPR_shift                   = 0,
+	SQ_VTX_WORD1_GPR__DST_REL_bit                     = 1 << 7,
+    SQ_ALU_WORD1_LDS_IDX_OP                               = 0x00008dfc,
+/* 	SRC2_SEL_mask                                     = 0x1ff << 0, */
+/* 	SRC2_SEL_shift                                    = 0, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+/* 	SRC2_REL_bit                                      = 1 << 9, */
+/* 	SRC2_CHAN_mask                                    = 0x03 << 10, */
+/* 	SRC2_CHAN_shift                                   = 10, */
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	IDX_OFFSET_1_bit                                  = 1 << 12,
+	SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_mask            = 0x1f << 13,
+	SQ_ALU_WORD1_LDS_IDX_OP__ALU_INST_shift           = 13,
+/* 	    SQ_OP3_INST_BFE_UINT                          = 0x04, */
+/* 	    SQ_OP3_INST_BFE_INT                           = 0x05, */
+/* 	    SQ_OP3_INST_BFI_INT                           = 0x06, */
+/* 	    SQ_OP3_INST_FMA                               = 0x07, */
+/* 	    SQ_OP3_INST_CNDNE_64                          = 0x09, */
+/* 	    SQ_OP3_INST_FMA_64                            = 0x0a, */
+/* 	    SQ_OP3_INST_LERP_UINT                         = 0x0b, */
+/* 	    SQ_OP3_INST_BIT_ALIGN_INT                     = 0x0c, */
+/* 	    SQ_OP3_INST_BYTE_ALIGN_INT                    = 0x0d, */
+/* 	    SQ_OP3_INST_SAD_ACCUM_UINT                    = 0x0e, */
+/* 	    SQ_OP3_INST_SAD_ACCUM_HI_UINT                 = 0x0f, */
+/* 	    SQ_OP3_INST_MULADD_UINT24                     = 0x10, */
+/* 	    SQ_OP3_INST_LDS_IDX_OP                        = 0x11, */
+/* 	    SQ_OP3_INST_MULADD                            = 0x14, */
+/* 	    SQ_OP3_INST_MULADD_M2                         = 0x15, */
+/* 	    SQ_OP3_INST_MULADD_M4                         = 0x16, */
+/* 	    SQ_OP3_INST_MULADD_D2                         = 0x17, */
+/* 	    SQ_OP3_INST_MULADD_IEEE                       = 0x18, */
+/* 	    SQ_OP3_INST_CNDE                              = 0x19, */
+/* 	    SQ_OP3_INST_CNDGT                             = 0x1a, */
+/* 	    SQ_OP3_INST_CNDGE                             = 0x1b, */
+/* 	    SQ_OP3_INST_CNDE_INT                          = 0x1c, */
+/* 	    SQ_OP3_INST_CNDGT_INT                         = 0x1d, */
+/* 	    SQ_OP3_INST_CNDGE_INT                         = 0x1e, */
+/* 	    SQ_OP3_INST_MUL_LIT                           = 0x1f, */
+/* 	BANK_SWIZZLE_mask                                 = 0x07 << 18, */
+/* 	BANK_SWIZZLE_shift                                = 18, */
+/* 	    SQ_ALU_VEC_012                                = 0x00, */
+/* 	    SQ_ALU_VEC_021                                = 0x01, */
+/* 	    SQ_ALU_VEC_120                                = 0x02, */
+/* 	    SQ_ALU_VEC_102                                = 0x03, */
+/* 	    SQ_ALU_VEC_201                                = 0x04, */
+/* 	    SQ_ALU_VEC_210                                = 0x05, */
+	LDS_OP_mask                                       = 0x3f << 21,
+	LDS_OP_shift                                      = 21,
+/* 	    SQ_DS_INST_ADD                                = 0x00, */
+/* 	    SQ_DS_INST_SUB                                = 0x01, */
+/* 	    SQ_DS_INST_RSUB                               = 0x02, */
+/* 	    SQ_DS_INST_INC                                = 0x03, */
+/* 	    SQ_DS_INST_DEC                                = 0x04, */
+/* 	    SQ_DS_INST_MIN_INT                            = 0x05, */
+/* 	    SQ_DS_INST_MAX_INT                            = 0x06, */
+/* 	    SQ_DS_INST_MIN_UINT                           = 0x07, */
+/* 	    SQ_DS_INST_MAX_UINT                           = 0x08, */
+/* 	    SQ_DS_INST_AND                                = 0x09, */
+/* 	    SQ_DS_INST_OR                                 = 0x0a, */
+/* 	    SQ_DS_INST_XOR                                = 0x0b, */
+/* 	    SQ_DS_INST_MSKOR                              = 0x0c, */
+/* 	    SQ_DS_INST_WRITE                              = 0x0d, */
+/* 	    SQ_DS_INST_WRITE_REL                          = 0x0e, */
+/* 	    SQ_DS_INST_WRITE2                             = 0x0f, */
+/* 	    SQ_DS_INST_CMP_STORE                          = 0x10, */
+/* 	    SQ_DS_INST_CMP_STORE_SPF                      = 0x11, */
+/* 	    SQ_DS_INST_BYTE_WRITE                         = 0x12, */
+/* 	    SQ_DS_INST_SHORT_WRITE                        = 0x13, */
+/* 	    SQ_DS_INST_ADD_RET                            = 0x20, */
+/* 	    SQ_DS_INST_SUB_RET                            = 0x21, */
+/* 	    SQ_DS_INST_RSUB_RET                           = 0x22, */
+/* 	    SQ_DS_INST_INC_RET                            = 0x23, */
+/* 	    SQ_DS_INST_DEC_RET                            = 0x24, */
+/* 	    SQ_DS_INST_MIN_INT_RET                        = 0x25, */
+/* 	    SQ_DS_INST_MAX_INT_RET                        = 0x26, */
+/* 	    SQ_DS_INST_MIN_UINT_RET                       = 0x27, */
+/* 	    SQ_DS_INST_MAX_UINT_RET                       = 0x28, */
+/* 	    SQ_DS_INST_AND_RET                            = 0x29, */
+/* 	    SQ_DS_INST_OR_RET                             = 0x2a, */
+/* 	    SQ_DS_INST_XOR_RET                            = 0x2b, */
+/* 	    SQ_DS_INST_MSKOR_RET                          = 0x2c, */
+/* 	    SQ_DS_INST_XCHG_RET                           = 0x2d, */
+/* 	    SQ_DS_INST_XCHG_REL_RET                       = 0x2e, */
+/* 	    SQ_DS_INST_XCHG2_RET                          = 0x2f, */
+/* 	    SQ_DS_INST_CMP_XCHG_RET                       = 0x30, */
+/* 	    SQ_DS_INST_CMP_XCHG_SPF_RET                   = 0x31, */
+/* 	    SQ_DS_INST_READ_RET                           = 0x32, */
+/* 	    SQ_DS_INST_READ_REL_RET                       = 0x33, */
+/* 	    SQ_DS_INST_READ2_RET                          = 0x34, */
+/* 	    SQ_DS_INST_READWRITE_RET                      = 0x35, */
+/* 	    SQ_DS_INST_BYTE_READ_RET                      = 0x36, */
+/* 	    SQ_DS_INST_UBYTE_READ_RET                     = 0x37, */
+/* 	    SQ_DS_INST_SHORT_READ_RET                     = 0x38, */
+/* 	    SQ_DS_INST_USHORT_READ_RET                    = 0x39, */
+/* 	    SQ_DS_INST_ATOMIC_ORDERED_ALLOC_RET           = 0x3f, */
+	IDX_OFFSET_0_bit                                  = 1 << 27,
+	IDX_OFFSET_2_bit                                  = 1 << 28,
+/* 	DST_CHAN_mask                                     = 0x03 << 29, */
+/* 	DST_CHAN_shift                                    = 29, */
+/* 	    CHAN_X                                        = 0x00, */
+/* 	    CHAN_Y                                        = 0x01, */
+/* 	    CHAN_Z                                        = 0x02, */
+/* 	    CHAN_W                                        = 0x03, */
+	IDX_OFFSET_3_bit                                  = 1 << 31,
+    SQ_CF_ENCODING_WORD1                                  = 0x00008dfc,
+	SQ_CF_ENCODING_WORD1__ENCODING_mask               = 0x03 << 28,
+	SQ_CF_ENCODING_WORD1__ENCODING_shift              = 28,
+	    SQ_CF_ENCODING_INST_CF                        = 0x00,
+	    SQ_CF_ENCODING_INST_ALLOC_EXPORT              = 0x01,
+	    SQ_CF_ENCODING_INST_ALU0                      = 0x02,
+	    SQ_CF_ENCODING_INST_ALU1                      = 0x03,
+    SQ_ALU_WORD0                                          = 0x00008dfc,
+/* 	SRC0_SEL_mask                                     = 0x1ff << 0, */
+/* 	SRC0_SEL_shift                                    = 0, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+/* 	SRC0_REL_bit                                      = 1 << 9, */
+/* 	SRC0_CHAN_mask                                    = 0x03 << 10, */
+/* 	SRC0_CHAN_shift                                   = 10, */
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	SRC0_NEG_bit                                      = 1 << 12,
+/* 	SRC1_SEL_mask                                     = 0x1ff << 13, */
+/* 	SRC1_SEL_shift                                    = 13, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A                           = 0xdb, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B                           = 0xdc, */
+/* 	    SQ_ALU_SRC_LDS_OQ_A_POP                       = 0xdd, */
+/* 	    SQ_ALU_SRC_LDS_OQ_B_POP                       = 0xde, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_A                       = 0xdf, */
+/* 	    SQ_ALU_SRC_LDS_DIRECT_B                       = 0xe0, */
+/* 	    SQ_ALU_SRC_TIME_HI                            = 0xe3, */
+/* 	    SQ_ALU_SRC_TIME_LO                            = 0xe4, */
+/* 	    SQ_ALU_SRC_MASK_HI                            = 0xe5, */
+/* 	    SQ_ALU_SRC_MASK_LO                            = 0xe6, */
+/* 	    SQ_ALU_SRC_HW_WAVE_ID                         = 0xe7, */
+/* 	    SQ_ALU_SRC_SIMD_ID                            = 0xe8, */
+/* 	    SQ_ALU_SRC_SE_ID                              = 0xe9, */
+/* 	    SQ_ALU_SRC_HW_THREADGRP_ID                    = 0xea, */
+/* 	    SQ_ALU_SRC_WAVE_ID_IN_GRP                     = 0xeb, */
+/* 	    SQ_ALU_SRC_NUM_THREADGRP_WAVES                = 0xec, */
+/* 	    SQ_ALU_SRC_HW_ALU_ODD                         = 0xed, */
+/* 	    SQ_ALU_SRC_LOOP_IDX                           = 0xee, */
+/* 	    SQ_ALU_SRC_PARAM_BASE_ADDR                    = 0xf0, */
+/* 	    SQ_ALU_SRC_NEW_PRIM_MASK                      = 0xf1, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_HI                       = 0xf2, */
+/* 	    SQ_ALU_SRC_PRIM_MASK_LO                       = 0xf3, */
+/* 	    SQ_ALU_SRC_1_DBL_L                            = 0xf4, */
+/* 	    SQ_ALU_SRC_1_DBL_M                            = 0xf5, */
+/* 	    SQ_ALU_SRC_0_5_DBL_L                          = 0xf6, */
+/* 	    SQ_ALU_SRC_0_5_DBL_M                          = 0xf7, */
+/* 	    SQ_ALU_SRC_0                                  = 0xf8, */
+/* 	    SQ_ALU_SRC_1                                  = 0xf9, */
+/* 	    SQ_ALU_SRC_1_INT                              = 0xfa, */
+/* 	    SQ_ALU_SRC_M_1_INT                            = 0xfb, */
+/* 	    SQ_ALU_SRC_0_5                                = 0xfc, */
+/* 	    SQ_ALU_SRC_LITERAL                            = 0xfd, */
+/* 	    SQ_ALU_SRC_PV                                 = 0xfe, */
+/* 	    SQ_ALU_SRC_PS                                 = 0xff, */
+/* 	SRC1_REL_bit                                      = 1 << 22, */
+/* 	SRC1_CHAN_mask                                    = 0x03 << 23, */
+/* 	SRC1_CHAN_shift                                   = 23, */
+/* 	    SQ_CHAN_X                                     = 0x00, */
+/* 	    SQ_CHAN_Y                                     = 0x01, */
+/* 	    SQ_CHAN_Z                                     = 0x02, */
+/* 	    SQ_CHAN_W                                     = 0x03, */
+	SRC1_NEG_bit                                      = 1 << 25,
+/* 	INDEX_MODE_mask                                   = 0x07 << 26, */
+/* 	INDEX_MODE_shift                                  = 26, */
+/* 	    SQ_INDEX_AR_X                                 = 0x00, */
+/* 	    SQ_INDEX_LOOP                                 = 0x04, */
+/* 	    SQ_INDEX_GLOBAL                               = 0x05, */
+/* 	    SQ_INDEX_GLOBAL_AR_X                          = 0x06, */
+/* 	PRED_SEL_mask                                     = 0x03 << 29, */
+/* 	PRED_SEL_shift                                    = 29, */
+/* 	    SQ_PRED_SEL_OFF                               = 0x00, */
+/* 	    SQ_PRED_SEL_ZERO                              = 0x02, */
+/* 	    SQ_PRED_SEL_ONE                               = 0x03, */
+/* 	LAST_bit                                          = 1 << 31, */
+    SQ_MEM_RD_WORD1                                       = 0x00008dfc,
+	SQ_MEM_RD_WORD1__DST_GPR_mask                     = 0x7f << 0,
+	SQ_MEM_RD_WORD1__DST_GPR_shift                    = 0,
+	SQ_MEM_RD_WORD1__DST_REL_bit                      = 1 << 7,
+	SQ_MEM_RD_WORD1__DST_SEL_X_mask                   = 0x07 << 9,
+	SQ_MEM_RD_WORD1__DST_SEL_X_shift                  = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DST_SEL_Y_mask                   = 0x07 << 12,
+	SQ_MEM_RD_WORD1__DST_SEL_Y_shift                  = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DST_SEL_Z_mask                   = 0x07 << 15,
+	SQ_MEM_RD_WORD1__DST_SEL_Z_shift                  = 15,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DST_SEL_W_mask                   = 0x07 << 18,
+	SQ_MEM_RD_WORD1__DST_SEL_W_shift                  = 18,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+/* 	    SQ_SEL_MASK                                   = 0x07, */
+	SQ_MEM_RD_WORD1__DATA_FORMAT_mask                 = 0x3f << 22,
+	SQ_MEM_RD_WORD1__DATA_FORMAT_shift                = 22,
+	SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_mask              = 0x03 << 28,
+	SQ_MEM_RD_WORD1__NUM_FORMAT_ALL_shift             = 28,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_MEM_RD_WORD1__FORMAT_COMP_ALL_bit              = 1 << 30,
+	SQ_MEM_RD_WORD1__SRF_MODE_ALL_bit                 = 1 << 31,
+    SQ_LSTMP_RING_BASE                                    = 0x00008e10,
+    SQ_LSTMP_RING_SIZE                                    = 0x00008e14,
+    SQ_HSTMP_RING_BASE                                    = 0x00008e18,
+    SQ_HSTMP_RING_SIZE                                    = 0x00008e1c,
+    SQ_EX_ALLOC_TABLE_SLOTS                               = 0x00008e48,
+	PIX_SLOTS_mask                                    = 0x7f << 0,
+	PIX_SLOTS_shift                                   = 0,
+	POS_SLOTS_mask                                    = 0x7f << 8,
+	POS_SLOTS_shift                                   = 8,
+	SMX_SLOTS_mask                                    = 0x7f << 16,
+	SMX_SLOTS_shift                                   = 16,
+    SX_EXPORT_BUFFER_SIZES                                = 0x0000900c,
+	COLOR_BUFFER_SIZE_mask                            = 0xff << 0,
+	COLOR_BUFFER_SIZE_shift                           = 0,
+	POSITION_BUFFER_SIZE_mask                         = 0xff << 8,
+	POSITION_BUFFER_SIZE_shift                        = 8,
+	SMX_BUFFER_SIZE_mask                              = 0xff << 16,
+	SMX_BUFFER_SIZE_shift                             = 16,
+    SX_MEMORY_EXPORT_BASE                                 = 0x00009010,
+    SX_MEMORY_EXPORT_SIZE                                 = 0x00009014,
+    SPI_CONFIG_CNTL                                       = 0x00009100,
+	GPR_WRITE_PRIORITY_mask                           = 0x3ffff << 0,
+	GPR_WRITE_PRIORITY_shift                          = 0,
+    SPI_CONFIG_CNTL_1                                     = 0x0000913c,
+	VTX_DONE_DELAY_mask                               = 0x0f << 0,
+	VTX_DONE_DELAY_shift                              = 0,
+	    X_DELAY_14_CLKS                               = 0x00,
+	    X_DELAY_16_CLKS                               = 0x01,
+	    X_DELAY_18_CLKS                               = 0x02,
+	    X_DELAY_20_CLKS                               = 0x03,
+	    X_DELAY_22_CLKS                               = 0x04,
+	    X_DELAY_24_CLKS                               = 0x05,
+	    X_DELAY_26_CLKS                               = 0x06,
+	    X_DELAY_28_CLKS                               = 0x07,
+	    X_DELAY_30_CLKS                               = 0x08,
+	    X_DELAY_32_CLKS                               = 0x09,
+	    X_DELAY_34_CLKS                               = 0x0a,
+	    X_DELAY_4_CLKS                                = 0x0b,
+	    X_DELAY_6_CLKS                                = 0x0c,
+	    X_DELAY_8_CLKS                                = 0x0d,
+	    X_DELAY_10_CLKS                               = 0x0e,
+	    X_DELAY_12_CLKS                               = 0x0f,
+	INTERP_ONE_PRIM_PER_ROW_bit                       = 1 << 4,
+	BC_OPTIMIZE_DISABLE_bit                           = 1 << 5,
+	PC_LIMIT_ENABLE_bit                               = 1 << 6,
+	PC_LIMIT_STRICT_bit                               = 1 << 7,
+	PC_LIMIT_SIZE_mask                                = 0xffff << 16,
+	PC_LIMIT_SIZE_shift                               = 16,
+    TD_CNTL                                               = 0x00009494,
+	SYNC_PHASE_SH_mask                                = 0x03 << 0,
+	SYNC_PHASE_SH_shift                               = 0,
+	PAD_STALL_EN_bit                                  = 1 << 8,
+	EXTEND_LDS_STALL_mask                             = 0x03 << 9,
+	EXTEND_LDS_STALL_shift                            = 9,
+	    X_0                                           = 0x00,
+	    EXTEND_LDS_STALL__X_1                         = 0x01,
+	    X_2                                           = 0x02,
+	    X_3                                           = 0x03,
+	GATHER4_FLOAT_MODE_bit                            = 1 << 16,
+	LD_FLOAT_MODE_bit                                 = 1 << 18,
+	GATHER4_DX9_MODE_bit                              = 1 << 19,
+    TD_STATUS                                             = 0x00009498,
+	BUSY_bit                                          = 1 << 31,
+    DB_SUBTILE_CONTROL                                    = 0x00009858,
+	MSAA1_X_mask                                      = 0x03 << 0,
+	MSAA1_X_shift                                     = 0,
+	MSAA1_Y_mask                                      = 0x03 << 2,
+	MSAA1_Y_shift                                     = 2,
+	MSAA2_X_mask                                      = 0x03 << 4,
+	MSAA2_X_shift                                     = 4,
+	MSAA2_Y_mask                                      = 0x03 << 6,
+	MSAA2_Y_shift                                     = 6,
+	MSAA4_X_mask                                      = 0x03 << 8,
+	MSAA4_X_shift                                     = 8,
+	MSAA4_Y_mask                                      = 0x03 << 10,
+	MSAA4_Y_shift                                     = 10,
+	MSAA8_X_mask                                      = 0x03 << 12,
+	MSAA8_X_shift                                     = 12,
+	MSAA8_Y_mask                                      = 0x03 << 14,
+	MSAA8_Y_shift                                     = 14,
+	MSAA16_X_mask                                     = 0x03 << 16,
+	MSAA16_X_shift                                    = 16,
+	MSAA16_Y_mask                                     = 0x03 << 18,
+	MSAA16_Y_shift                                    = 18,
+    DB_ZPASS_COUNT_LOW                                    = 0x00009870,
+    DB_ZPASS_COUNT_HI                                     = 0x00009874,
+	COUNT_HI_mask                                     = 0x7fffffff << 0,
+	COUNT_HI_shift                                    = 0,
+    TD_PS_BORDER_COLOR_INDEX                              = 0x0000a400,
+	INDEX_mask                                        = 0x1f << 0,
+	INDEX_shift                                       = 0,
+    TD_PS_BORDER_COLOR_RED                                = 0x0000a404,
+    TD_PS_BORDER_COLOR_GREEN                              = 0x0000a408,
+    TD_PS_BORDER_COLOR_BLUE                               = 0x0000a40c,
+    TD_PS_BORDER_COLOR_ALPHA                              = 0x0000a410,
+    TD_VS_BORDER_COLOR_INDEX                              = 0x0000a414,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_VS_BORDER_COLOR_RED                                = 0x0000a418,
+    TD_VS_BORDER_COLOR_GREEN                              = 0x0000a41c,
+    TD_VS_BORDER_COLOR_BLUE                               = 0x0000a420,
+    TD_VS_BORDER_COLOR_ALPHA                              = 0x0000a424,
+    TD_GS_BORDER_COLOR_INDEX                              = 0x0000a428,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_GS_BORDER_COLOR_RED                                = 0x0000a42c,
+    TD_GS_BORDER_COLOR_GREEN                              = 0x0000a430,
+    TD_GS_BORDER_COLOR_BLUE                               = 0x0000a434,
+    TD_GS_BORDER_COLOR_ALPHA                              = 0x0000a438,
+    TD_HS_BORDER_COLOR_INDEX                              = 0x0000a43c,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_HS_BORDER_COLOR_RED                                = 0x0000a440,
+    TD_HS_BORDER_COLOR_GREEN                              = 0x0000a444,
+    TD_HS_BORDER_COLOR_BLUE                               = 0x0000a448,
+    TD_HS_BORDER_COLOR_ALPHA                              = 0x0000a44c,
+    TD_LS_BORDER_COLOR_INDEX                              = 0x0000a450,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_LS_BORDER_COLOR_RED                                = 0x0000a454,
+    TD_LS_BORDER_COLOR_GREEN                              = 0x0000a458,
+    TD_LS_BORDER_COLOR_BLUE                               = 0x0000a45c,
+    TD_LS_BORDER_COLOR_ALPHA                              = 0x0000a460,
+    TD_CS_BORDER_COLOR_INDEX                              = 0x0000a464,
+/* 	INDEX_mask                                        = 0x1f << 0, */
+/* 	INDEX_shift                                       = 0, */
+    TD_CS_BORDER_COLOR_RED                                = 0x0000a468,
+    TD_CS_BORDER_COLOR_GREEN                              = 0x0000a46c,
+    TD_CS_BORDER_COLOR_BLUE                               = 0x0000a470,
+    TD_CS_BORDER_COLOR_ALPHA                              = 0x0000a474,
+    DB_RENDER_CONTROL                                     = 0x00028000,
+	DEPTH_CLEAR_ENABLE_bit                            = 1 << 0,
+	STENCIL_CLEAR_ENABLE_bit                          = 1 << 1,
+	DEPTH_COPY_bit                                    = 1 << 2,
+	STENCIL_COPY_bit                                  = 1 << 3,
+	RESUMMARIZE_ENABLE_bit                            = 1 << 4,
+	STENCIL_COMPRESS_DISABLE_bit                      = 1 << 5,
+	DEPTH_COMPRESS_DISABLE_bit                        = 1 << 6,
+	COPY_CENTROID_bit                                 = 1 << 7,
+	COPY_SAMPLE_mask                                  = 0x0f << 8,
+	COPY_SAMPLE_shift                                 = 8,
+    DB_COUNT_CONTROL                                      = 0x00028004,
+	ZPASS_INCREMENT_DISABLE_bit                       = 1 << 0,
+	PERFECT_ZPASS_COUNTS_bit                          = 1 << 1,
+	SAMPLE_RATE_mask                                  = 0x07 << 4,
+	SAMPLE_RATE_shift                                 = 4,
+    DB_DEPTH_VIEW                                         = 0x00028008,
+	SLICE_START_mask                                  = 0x7ff << 0,
+	SLICE_START_shift                                 = 0,
+	SLICE_MAX_mask                                    = 0x7ff << 13,
+	SLICE_MAX_shift                                   = 13,
+	Z_READ_ONLY_bit                                   = 1 << 24,
+	STENCIL_READ_ONLY_bit                             = 1 << 25,
+    DB_RENDER_OVERRIDE                                    = 0x0002800c,
+	FORCE_HIZ_ENABLE_mask                             = 0x03 << 0,
+	FORCE_HIZ_ENABLE_shift                            = 0,
+	    FORCE_OFF                                     = 0x00,
+	    FORCE_ENABLE                                  = 0x01,
+	    FORCE_DISABLE                                 = 0x02,
+	    FORCE_RESERVED                                = 0x03,
+	FORCE_HIS_ENABLE0_mask                            = 0x03 << 2,
+	FORCE_HIS_ENABLE0_shift                           = 2,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_HIS_ENABLE1_mask                            = 0x03 << 4,
+	FORCE_HIS_ENABLE1_shift                           = 4,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_SHADER_Z_ORDER_bit                          = 1 << 6,
+	FAST_Z_DISABLE_bit                                = 1 << 7,
+	FAST_STENCIL_DISABLE_bit                          = 1 << 8,
+	NOOP_CULL_DISABLE_bit                             = 1 << 9,
+	FORCE_COLOR_KILL_bit                              = 1 << 10,
+	FORCE_Z_READ_bit                                  = 1 << 11,
+	FORCE_STENCIL_READ_bit                            = 1 << 12,
+	FORCE_FULL_Z_RANGE_mask                           = 0x03 << 13,
+	FORCE_FULL_Z_RANGE_shift                          = 13,
+/* 	    FORCE_OFF                                     = 0x00, */
+/* 	    FORCE_ENABLE                                  = 0x01, */
+/* 	    FORCE_DISABLE                                 = 0x02, */
+/* 	    FORCE_RESERVED                                = 0x03, */
+	FORCE_QC_SMASK_CONFLICT_bit                       = 1 << 15,
+	DISABLE_VIEWPORT_CLAMP_bit                        = 1 << 16,
+	IGNORE_SC_ZRANGE_bit                              = 1 << 17,
+	DISABLE_FULLY_COVERED_bit                         = 1 << 18,
+	FORCE_Z_LIMIT_SUMM_mask                           = 0x03 << 19,
+	FORCE_Z_LIMIT_SUMM_shift                          = 19,
+	    FORCE_SUMM_OFF                                = 0x00,
+	    FORCE_SUMM_MINZ                               = 0x01,
+	    FORCE_SUMM_MAXZ                               = 0x02,
+	    FORCE_SUMM_BOTH                               = 0x03,
+	MAX_TILES_IN_DTT_mask                             = 0x1f << 21,
+	MAX_TILES_IN_DTT_shift                            = 21,
+	DISABLE_TILE_RATE_TILES_bit                       = 1 << 26,
+	FORCE_Z_DIRTY_bit                                 = 1 << 27,
+	FORCE_STENCIL_DIRTY_bit                           = 1 << 28,
+	FORCE_Z_VALID_bit                                 = 1 << 29,
+	FORCE_STENCIL_VALID_bit                           = 1 << 30,
+	PRESERVE_COMPRESSION_bit                          = 1 << 31,
+    DB_RENDER_OVERRIDE2                                   = 0x00028010,
+	PARTIAL_SQUAD_LAUNCH_CONTROL_mask                 = 0x03 << 0,
+	PARTIAL_SQUAD_LAUNCH_CONTROL_shift                = 0,
+	    PSLC_AUTO                                     = 0x00,
+	    PSLC_ON_HANG_ONLY                             = 0x01,
+	    PSLC_ASAP                                     = 0x02,
+	    PSLC_COUNTDOWN                                = 0x03,
+	PARTIAL_SQUAD_LAUNCH_COUNTDOWN_mask               = 0x07 << 2,
+	PARTIAL_SQUAD_LAUNCH_COUNTDOWN_shift              = 2,
+	DISABLE_ZMASK_EXPCLEAR_OPTIMIZATIO_bit            = 1 << 5,
+	DISABLE_SMEM_EXPCLEAR_OPTIMIZATION_bit            = 1 << 6,
+	DISABLE_COLOR_ON_VALIDATION_bit                   = 1 << 7,
+	DECOMPRESS_Z_ON_FLUSH_bit                         = 1 << 8,
+    DB_HTILE_DATA_BASE                                    = 0x00028014,
+    DB_STENCIL_CLEAR                                      = 0x00028028,
+	DB_STENCIL_CLEAR__CLEAR_mask                      = 0xff << 0,
+	DB_STENCIL_CLEAR__CLEAR_shift                     = 0,
+    DB_DEPTH_CLEAR                                        = 0x0002802c,
+    PA_SC_SCREEN_SCISSOR_TL                               = 0x00028030,
+	PA_SC_SCREEN_SCISSOR_TL__TL_X_mask                = 0xffff << 0,
+	PA_SC_SCREEN_SCISSOR_TL__TL_X_shift               = 0,
+	PA_SC_SCREEN_SCISSOR_TL__TL_Y_mask                = 0xffff << 16,
+	PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift               = 16,
+    PA_SC_SCREEN_SCISSOR_BR                               = 0x00028034,
+	PA_SC_SCREEN_SCISSOR_BR__BR_X_mask                = 0xffff << 0,
+	PA_SC_SCREEN_SCISSOR_BR__BR_X_shift               = 0,
+	PA_SC_SCREEN_SCISSOR_BR__BR_Y_mask                = 0xffff << 16,
+	PA_SC_SCREEN_SCISSOR_BR__BR_Y_shift               = 16,
+    DB_DEPTH_INFO                                         = 0x0002803c,
+	ADDR5_SWIZZLE_MASK_mask                           = 0x0f << 0,
+	ADDR5_SWIZZLE_MASK_shift                          = 0,
+    DB_Z_INFO                                             = 0x00028040,
+	DB_Z_INFO__FORMAT_mask                            = 0x03 << 0,
+	DB_Z_INFO__FORMAT_shift                           = 0,
+	    Z_INVALID                                     = 0x00,
+	    Z_16                                          = 0x01,
+	    Z_24                                          = 0x02,
+	    Z_32_FLOAT                                    = 0x03,
+	DB_Z_INFO__NUM_SAMPLES_mask                       = 0x03 << 2,
+	DB_Z_INFO__NUM_SAMPLES_shift                      = 2,
+	DB_Z_INFO__ARRAY_MODE_mask                        = 0x0f << 4,
+	DB_Z_INFO__ARRAY_MODE_shift                       = 4,
+	    ARRAY_1D_TILED_THIN1                          = 0x02,
+	    ARRAY_2D_TILED_THIN1                          = 0x04,
+	DB_Z_INFO__TILE_SPLIT_mask                        = 0x07 << 8,
+	DB_Z_INFO__TILE_SPLIT_shift                       = 8,
+	    ADDR_SURF_TILE_SPLIT_64B                      = 0x00,
+	    ADDR_SURF_TILE_SPLIT_128B                     = 0x01,
+	    ADDR_SURF_TILE_SPLIT_256B                     = 0x02,
+	    ADDR_SURF_TILE_SPLIT_512B                     = 0x03,
+	    ADDR_SURF_TILE_SPLIT_1KB                      = 0x04,
+	    ADDR_SURF_TILE_SPLIT_2KB                      = 0x05,
+	    ADDR_SURF_TILE_SPLIT_4KB                      = 0x06,
+	DB_Z_INFO__NUM_BANKS_mask                         = 0x03 << 12,
+	DB_Z_INFO__NUM_BANKS_shift                        = 12,
+	    ADDR_SURF_2_BANK                              = 0x00,
+	    ADDR_SURF_4_BANK                              = 0x01,
+	    ADDR_SURF_8_BANK                              = 0x02,
+	    ADDR_SURF_16_BANK                             = 0x03,
+	DB_Z_INFO__BANK_WIDTH_mask                        = 0x03 << 16,
+	DB_Z_INFO__BANK_WIDTH_shift                       = 16,
+	    ADDR_SURF_BANK_WIDTH_1                        = 0x00,
+	    ADDR_SURF_BANK_WIDTH_2                        = 0x01,
+	    ADDR_SURF_BANK_WIDTH_4                        = 0x02,
+	    ADDR_SURF_BANK_WIDTH_8                        = 0x03,
+	DB_Z_INFO__BANK_HEIGHT_mask                       = 0x03 << 20,
+	DB_Z_INFO__BANK_HEIGHT_shift                      = 20,
+	    ADDR_SURF_BANK_HEIGHT_1                       = 0x00,
+	    ADDR_SURF_BANK_HEIGHT_2                       = 0x01,
+	    ADDR_SURF_BANK_HEIGHT_4                       = 0x02,
+	    ADDR_SURF_BANK_HEIGHT_8                       = 0x03,
+	DB_Z_INFO__MACRO_TILE_ASPECT_mask                 = 0x03 << 24,
+	DB_Z_INFO__MACRO_TILE_ASPECT_shift                = 24,
+	    ADDR_SURF_MACRO_ASPECT_1                      = 0x00,
+	    ADDR_SURF_MACRO_ASPECT_2                      = 0x01,
+	    ADDR_SURF_MACRO_ASPECT_4                      = 0x02,
+	    ADDR_SURF_MACRO_ASPECT_8                      = 0x03,
+	ALLOW_EXPCLEAR_bit                                = 1 << 27,
+	READ_SIZE_bit                                     = 1 << 28,
+	TILE_SURFACE_ENABLE_bit                           = 1 << 29,
+	ZRANGE_PRECISION_bit                              = 1 << 31,
+    DB_STENCIL_INFO                                       = 0x00028044,
+	DB_STENCIL_INFO__FORMAT_bit                       = 1 << 0,
+	DB_STENCIL_INFO__TILE_SPLIT_mask                  = 0x07 << 8,
+	DB_STENCIL_INFO__TILE_SPLIT_shift                 = 8,
+/* 	    ADDR_SURF_TILE_SPLIT_64B                      = 0x00, */
+/* 	    ADDR_SURF_TILE_SPLIT_128B                     = 0x01, */
+/* 	    ADDR_SURF_TILE_SPLIT_256B                     = 0x02, */
+/* 	    ADDR_SURF_TILE_SPLIT_512B                     = 0x03, */
+/* 	    ADDR_SURF_TILE_SPLIT_1KB                      = 0x04, */
+/* 	    ADDR_SURF_TILE_SPLIT_2KB                      = 0x05, */
+/* 	    ADDR_SURF_TILE_SPLIT_4KB                      = 0x06, */
+/* 	ALLOW_EXPCLEAR_bit                                = 1 << 27, */
+	TILE_STENCIL_DISABLE_bit                          = 1 << 29,
+    DB_Z_READ_BASE                                        = 0x00028048,
+    DB_STENCIL_READ_BASE                                  = 0x0002804c,
+    DB_Z_WRITE_BASE                                       = 0x00028050,
+    DB_STENCIL_WRITE_BASE                                 = 0x00028054,
+    DB_DEPTH_SIZE                                         = 0x00028058,
+	PITCH_TILE_MAX_mask                               = 0x7ff << 0,
+	PITCH_TILE_MAX_shift                              = 0,
+	HEIGHT_TILE_MAX_mask                              = 0x7ff << 11,
+	HEIGHT_TILE_MAX_shift                             = 11,
+    DB_DEPTH_SLICE                                        = 0x0002805c,
+	SLICE_TILE_MAX_mask                               = 0x3fffff << 0,
+	SLICE_TILE_MAX_shift                              = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_PS_0                         = 0x00028140,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_PS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_VS_0                         = 0x00028180,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_VS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_GS_0                         = 0x000281c0,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_GS_0__DATA_shift         = 0,
+    PA_SC_WINDOW_OFFSET                                   = 0x00028200,
+	WINDOW_X_OFFSET_mask                              = 0xffff << 0,
+	WINDOW_X_OFFSET_shift                             = 0,
+	WINDOW_Y_OFFSET_mask                              = 0xffff << 16,
+	WINDOW_Y_OFFSET_shift                             = 16,
+    PA_SC_WINDOW_SCISSOR_TL                               = 0x00028204,
+	PA_SC_WINDOW_SCISSOR_TL__TL_X_mask                = 0x7fff << 0,
+	PA_SC_WINDOW_SCISSOR_TL__TL_X_shift               = 0,
+	PA_SC_WINDOW_SCISSOR_TL__TL_Y_mask                = 0x7fff << 16,
+	PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift               = 16,
+	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31,
+    PA_SC_WINDOW_SCISSOR_BR                               = 0x00028208,
+	PA_SC_WINDOW_SCISSOR_BR__BR_X_mask                = 0x7fff << 0,
+	PA_SC_WINDOW_SCISSOR_BR__BR_X_shift               = 0,
+	PA_SC_WINDOW_SCISSOR_BR__BR_Y_mask                = 0x7fff << 16,
+	PA_SC_WINDOW_SCISSOR_BR__BR_Y_shift               = 16,
+    PA_SC_CLIPRECT_RULE                                   = 0x0002820c,
+	CLIP_RULE_mask                                    = 0xffff << 0,
+	CLIP_RULE_shift                                   = 0,
+    PA_SC_CLIPRECT_0_TL                                   = 0x00028210,
+	PA_SC_CLIPRECT_0_TL_num                           = 4,
+	PA_SC_CLIPRECT_0_TL_offset                        = 8,
+	PA_SC_CLIPRECT_0_TL__TL_X_mask                    = 0x7fff << 0,
+	PA_SC_CLIPRECT_0_TL__TL_X_shift                   = 0,
+	PA_SC_CLIPRECT_0_TL__TL_Y_mask                    = 0x7fff << 16,
+	PA_SC_CLIPRECT_0_TL__TL_Y_shift                   = 16,
+    PA_SC_CLIPRECT_0_BR                                   = 0x00028214,
+	PA_SC_CLIPRECT_0_BR_num                           = 4,
+	PA_SC_CLIPRECT_0_BR_offset                        = 8,
+	PA_SC_CLIPRECT_0_BR__BR_X_mask                    = 0x7fff << 0,
+	PA_SC_CLIPRECT_0_BR__BR_X_shift                   = 0,
+	PA_SC_CLIPRECT_0_BR__BR_Y_mask                    = 0x7fff << 16,
+	PA_SC_CLIPRECT_0_BR__BR_Y_shift                   = 16,
+    PA_SC_EDGERULE                                        = 0x00028230,
+	ER_TRI_mask                                       = 0x0f << 0,
+	ER_TRI_shift                                      = 0,
+	ER_POINT_mask                                     = 0x0f << 4,
+	ER_POINT_shift                                    = 4,
+	ER_RECT_mask                                      = 0x0f << 8,
+	ER_RECT_shift                                     = 8,
+	ER_LINE_LR_mask                                   = 0x3f << 12,
+	ER_LINE_LR_shift                                  = 12,
+	ER_LINE_RL_mask                                   = 0x3f << 18,
+	ER_LINE_RL_shift                                  = 18,
+	ER_LINE_TB_mask                                   = 0x0f << 24,
+	ER_LINE_TB_shift                                  = 24,
+	ER_LINE_BT_mask                                   = 0x0f << 28,
+	ER_LINE_BT_shift                                  = 28,
+    PA_SU_HARDWARE_SCREEN_OFFSET                          = 0x00028234,
+	HW_SCREEN_OFFSET_X_mask                           = 0x1f << 0,
+	HW_SCREEN_OFFSET_X_shift                          = 0,
+	HW_SCREEN_OFFSET_Y_mask                           = 0x1f << 8,
+	HW_SCREEN_OFFSET_Y_shift                          = 8,
+    CB_TARGET_MASK                                        = 0x00028238,
+	TARGET0_ENABLE_mask                               = 0x0f << 0,
+	TARGET0_ENABLE_shift                              = 0,
+	TARGET1_ENABLE_mask                               = 0x0f << 4,
+	TARGET1_ENABLE_shift                              = 4,
+	TARGET2_ENABLE_mask                               = 0x0f << 8,
+	TARGET2_ENABLE_shift                              = 8,
+	TARGET3_ENABLE_mask                               = 0x0f << 12,
+	TARGET3_ENABLE_shift                              = 12,
+	TARGET4_ENABLE_mask                               = 0x0f << 16,
+	TARGET4_ENABLE_shift                              = 16,
+	TARGET5_ENABLE_mask                               = 0x0f << 20,
+	TARGET5_ENABLE_shift                              = 20,
+	TARGET6_ENABLE_mask                               = 0x0f << 24,
+	TARGET6_ENABLE_shift                              = 24,
+	TARGET7_ENABLE_mask                               = 0x0f << 28,
+	TARGET7_ENABLE_shift                              = 28,
+    CB_SHADER_MASK                                        = 0x0002823c,
+	OUTPUT0_ENABLE_mask                               = 0x0f << 0,
+	OUTPUT0_ENABLE_shift                              = 0,
+	OUTPUT1_ENABLE_mask                               = 0x0f << 4,
+	OUTPUT1_ENABLE_shift                              = 4,
+	OUTPUT2_ENABLE_mask                               = 0x0f << 8,
+	OUTPUT2_ENABLE_shift                              = 8,
+	OUTPUT3_ENABLE_mask                               = 0x0f << 12,
+	OUTPUT3_ENABLE_shift                              = 12,
+	OUTPUT4_ENABLE_mask                               = 0x0f << 16,
+	OUTPUT4_ENABLE_shift                              = 16,
+	OUTPUT5_ENABLE_mask                               = 0x0f << 20,
+	OUTPUT5_ENABLE_shift                              = 20,
+	OUTPUT6_ENABLE_mask                               = 0x0f << 24,
+	OUTPUT6_ENABLE_shift                              = 24,
+	OUTPUT7_ENABLE_mask                               = 0x0f << 28,
+	OUTPUT7_ENABLE_shift                              = 28,
+    PA_SC_GENERIC_SCISSOR_TL                              = 0x00028240,
+	PA_SC_GENERIC_SCISSOR_TL__TL_X_mask               = 0x7fff << 0,
+	PA_SC_GENERIC_SCISSOR_TL__TL_X_shift              = 0,
+	PA_SC_GENERIC_SCISSOR_TL__TL_Y_mask               = 0x7fff << 16,
+	PA_SC_GENERIC_SCISSOR_TL__TL_Y_shift              = 16,
+/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_GENERIC_SCISSOR_BR                              = 0x00028244,
+	PA_SC_GENERIC_SCISSOR_BR__BR_X_mask               = 0x7fff << 0,
+	PA_SC_GENERIC_SCISSOR_BR__BR_X_shift              = 0,
+	PA_SC_GENERIC_SCISSOR_BR__BR_Y_mask               = 0x7fff << 16,
+	PA_SC_GENERIC_SCISSOR_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_SCISSOR_0_TL                              = 0x00028250,
+	PA_SC_VPORT_SCISSOR_0_TL_num                      = 16,
+	PA_SC_VPORT_SCISSOR_0_TL_offset                   = 8,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_X_mask               = 0x7fff << 0,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_X_shift              = 0,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_mask               = 0x7fff << 16,
+	PA_SC_VPORT_SCISSOR_0_TL__TL_Y_shift              = 16,
+/* 	WINDOW_OFFSET_DISABLE_bit                         = 1 << 31, */
+    PA_SC_VPORT_SCISSOR_0_BR                              = 0x00028254,
+	PA_SC_VPORT_SCISSOR_0_BR_num                      = 16,
+	PA_SC_VPORT_SCISSOR_0_BR_offset                   = 8,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_X_mask               = 0x7fff << 0,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_X_shift              = 0,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_mask               = 0x7fff << 16,
+	PA_SC_VPORT_SCISSOR_0_BR__BR_Y_shift              = 16,
+    PA_SC_VPORT_ZMIN_0                                    = 0x000282d0,
+	PA_SC_VPORT_ZMIN_0_num                            = 16,
+	PA_SC_VPORT_ZMIN_0_offset                         = 8,
+    PA_SC_VPORT_ZMAX_0                                    = 0x000282d4,
+	PA_SC_VPORT_ZMAX_0_num                            = 16,
+	PA_SC_VPORT_ZMAX_0_offset                         = 8,
+    SX_MISC                                               = 0x00028350,
+	MULTIPASS_bit                                     = 1 << 0,
+    SX_SURFACE_SYNC                                       = 0x00028354,
+	SURFACE_SYNC_MASK_mask                            = 0x3ff << 0,
+	SURFACE_SYNC_MASK_shift                           = 0,
+    SX_SCATTER_EXPORT_BASE                                = 0x00028358,
+    SX_SCATTER_EXPORT_SIZE                                = 0x0002835c,
+    SQ_VTX_SEMANTIC_0                                     = 0x00028380,
+	SQ_VTX_SEMANTIC_0_num                             = 32,
+/* 	SEMANTIC_ID_mask                                  = 0xff << 0, */
+/* 	SEMANTIC_ID_shift                                 = 0, */
+    VGT_MAX_VTX_INDX                                      = 0x00028400,
+    VGT_MIN_VTX_INDX                                      = 0x00028404,
+    VGT_INDX_OFFSET                                       = 0x00028408,
+    VGT_MULTI_PRIM_IB_RESET_INDX                          = 0x0002840c,
+    SX_ALPHA_TEST_CONTROL                                 = 0x00028410,
+	ALPHA_FUNC_mask                                   = 0x07 << 0,
+	ALPHA_FUNC_shift                                  = 0,
+	    REF_NEVER                                     = 0x00,
+	    REF_LESS                                      = 0x01,
+	    REF_EQUAL                                     = 0x02,
+	    REF_LEQUAL                                    = 0x03,
+	    REF_GREATER                                   = 0x04,
+	    REF_NOTEQUAL                                  = 0x05,
+	    REF_GEQUAL                                    = 0x06,
+	    REF_ALWAYS                                    = 0x07,
+	ALPHA_TEST_ENABLE_bit                             = 1 << 3,
+	ALPHA_TEST_BYPASS_bit                             = 1 << 8,
+    CB_BLEND_RED                                          = 0x00028414,
+    CB_BLEND_GREEN                                        = 0x00028418,
+    CB_BLEND_BLUE                                         = 0x0002841c,
+    CB_BLEND_ALPHA                                        = 0x00028420,
+    DB_STENCILREFMASK                                     = 0x00028430,
+	STENCILREF_mask                                   = 0xff << 0,
+	STENCILREF_shift                                  = 0,
+	STENCILMASK_mask                                  = 0xff << 8,
+	STENCILMASK_shift                                 = 8,
+	STENCILWRITEMASK_mask                             = 0xff << 16,
+	STENCILWRITEMASK_shift                            = 16,
+    DB_STENCILREFMASK_BF                                  = 0x00028434,
+	STENCILREF_BF_mask                                = 0xff << 0,
+	STENCILREF_BF_shift                               = 0,
+	STENCILMASK_BF_mask                               = 0xff << 8,
+	STENCILMASK_BF_shift                              = 8,
+	STENCILWRITEMASK_BF_mask                          = 0xff << 16,
+	STENCILWRITEMASK_BF_shift                         = 16,
+    SX_ALPHA_REF                                          = 0x00028438,
+    PA_CL_VPORT_XSCALE_0                                  = 0x0002843c,
+	PA_CL_VPORT_XSCALE_0_num                          = 16,
+	PA_CL_VPORT_XSCALE_0_offset                       = 24,
+    PA_CL_VPORT_XOFFSET_0                                 = 0x00028440,
+	PA_CL_VPORT_XOFFSET_0_num                         = 16,
+	PA_CL_VPORT_XOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_YSCALE_0                                  = 0x00028444,
+	PA_CL_VPORT_YSCALE_0_num                          = 16,
+	PA_CL_VPORT_YSCALE_0_offset                       = 24,
+    PA_CL_VPORT_YOFFSET_0                                 = 0x00028448,
+	PA_CL_VPORT_YOFFSET_0_num                         = 16,
+	PA_CL_VPORT_YOFFSET_0_offset                      = 24,
+    PA_CL_VPORT_ZSCALE_0                                  = 0x0002844c,
+	PA_CL_VPORT_ZSCALE_0_num                          = 16,
+	PA_CL_VPORT_ZSCALE_0_offset                       = 24,
+    PA_CL_VPORT_ZOFFSET_0                                 = 0x00028450,
+	PA_CL_VPORT_ZOFFSET_0_num                         = 16,
+	PA_CL_VPORT_ZOFFSET_0_offset                      = 24,
+    PA_CL_UCP_0_X                                         = 0x000285bc,
+	PA_CL_UCP_0_X_num                                 = 6,
+	PA_CL_UCP_0_X_offset                              = 16,
+    PA_CL_UCP_0_Y                                         = 0x000285c0,
+	PA_CL_UCP_0_Y_num                                 = 6,
+	PA_CL_UCP_0_Y_offset                              = 16,
+    PA_CL_UCP_0_Z                                         = 0x000285c4,
+	PA_CL_UCP_0_Z_num                                 = 6,
+	PA_CL_UCP_0_Z_offset                              = 16,
+    PA_CL_UCP_0_W                                         = 0x000285c8,
+	PA_CL_UCP_0_W_num                                 = 6,
+	PA_CL_UCP_0_W_offset                              = 16,
+    SPI_VS_OUT_ID_0                                       = 0x0002861c,
+	SPI_VS_OUT_ID_0_num                               = 10,
+	SEMANTIC_0_mask                                   = 0xff << 0,
+	SEMANTIC_0_shift                                  = 0,
+	SEMANTIC_1_mask                                   = 0xff << 8,
+	SEMANTIC_1_shift                                  = 8,
+	SEMANTIC_2_mask                                   = 0xff << 16,
+	SEMANTIC_2_shift                                  = 16,
+	SEMANTIC_3_mask                                   = 0xff << 24,
+	SEMANTIC_3_shift                                  = 24,
+    SPI_PS_INPUT_CNTL_0                                   = 0x00028644,
+	SPI_PS_INPUT_CNTL_0_num                           = 32,
+	SEMANTIC_mask                                     = 0xff << 0,
+	SEMANTIC_shift                                    = 0,
+	DEFAULT_VAL_mask                                  = 0x03 << 8,
+	DEFAULT_VAL_shift                                 = 8,
+	    X_0_0F                                        = 0x00,
+	FLAT_SHADE_bit                                    = 1 << 10,
+	CYL_WRAP_mask                                     = 0x0f << 13,
+	CYL_WRAP_shift                                    = 13,
+	PT_SPRITE_TEX_bit                                 = 1 << 17,
+    SPI_VS_OUT_CONFIG                                     = 0x000286c4,
+	VS_PER_COMPONENT_bit                              = 1 << 0,
+	VS_EXPORT_COUNT_mask                              = 0x1f << 1,
+	VS_EXPORT_COUNT_shift                             = 1,
+	VS_HALF_PACK_bit                                  = 1 << 6,
+	VS_EXPORTS_FOG_bit                                = 1 << 8,
+	VS_OUT_FOG_VEC_ADDR_mask                          = 0x1f << 9,
+	VS_OUT_FOG_VEC_ADDR_shift                         = 9,
+    SPI_PS_IN_CONTROL_0                                   = 0x000286cc,
+	NUM_INTERP_mask                                   = 0x3f << 0,
+	NUM_INTERP_shift                                  = 0,
+	POSITION_ENA_bit                                  = 1 << 8,
+	POSITION_CENTROID_bit                             = 1 << 9,
+	POSITION_ADDR_mask                                = 0x1f << 10,
+	POSITION_ADDR_shift                               = 10,
+	PARAM_GEN_mask                                    = 0x0f << 15,
+	PARAM_GEN_shift                                   = 15,
+	PERSP_GRADIENT_ENA_bit                            = 1 << 28,
+	LINEAR_GRADIENT_ENA_bit                           = 1 << 29,
+	POSITION_SAMPLE_bit                               = 1 << 30,
+    SPI_PS_IN_CONTROL_1                                   = 0x000286d0,
+	FRONT_FACE_ENA_bit                                = 1 << 8,
+	FRONT_FACE_ALL_BITS_bit                           = 1 << 11,
+	FRONT_FACE_ADDR_mask                              = 0x1f << 12,
+	FRONT_FACE_ADDR_shift                             = 12,
+	FOG_ADDR_mask                                     = 0x7f << 17,
+	FOG_ADDR_shift                                    = 17,
+	FIXED_PT_POSITION_ENA_bit                         = 1 << 24,
+	FIXED_PT_POSITION_ADDR_mask                       = 0x1f << 25,
+	FIXED_PT_POSITION_ADDR_shift                      = 25,
+	POSITION_ULC_bit                                  = 1 << 30,
+    SPI_INTERP_CONTROL_0                                  = 0x000286d4,
+	FLAT_SHADE_ENA_bit                                = 1 << 0,
+	PNT_SPRITE_ENA_bit                                = 1 << 1,
+	PNT_SPRITE_OVRD_X_mask                            = 0x07 << 2,
+	PNT_SPRITE_OVRD_X_shift                           = 2,
+	    SPI_PNT_SPRITE_SEL_0                          = 0x00,
+	    SPI_PNT_SPRITE_SEL_1                          = 0x01,
+	    SPI_PNT_SPRITE_SEL_S                          = 0x02,
+	    SPI_PNT_SPRITE_SEL_T                          = 0x03,
+	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04,
+	PNT_SPRITE_OVRD_Y_mask                            = 0x07 << 5,
+	PNT_SPRITE_OVRD_Y_shift                           = 5,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_OVRD_Z_mask                            = 0x07 << 8,
+	PNT_SPRITE_OVRD_Z_shift                           = 8,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_OVRD_W_mask                            = 0x07 << 11,
+	PNT_SPRITE_OVRD_W_shift                           = 11,
+/* 	    SPI_PNT_SPRITE_SEL_0                          = 0x00, */
+/* 	    SPI_PNT_SPRITE_SEL_1                          = 0x01, */
+/* 	    SPI_PNT_SPRITE_SEL_S                          = 0x02, */
+/* 	    SPI_PNT_SPRITE_SEL_T                          = 0x03, */
+/* 	    SPI_PNT_SPRITE_SEL_NONE                       = 0x04, */
+	PNT_SPRITE_TOP_1_bit                              = 1 << 14,
+    SPI_INPUT_Z                                           = 0x000286d8,
+	PROVIDE_Z_TO_SPI_bit                              = 1 << 0,
+    SPI_FOG_CNTL                                          = 0x000286dc,
+	PASS_FOG_THROUGH_PS_bit                           = 1 << 0,
+    SPI_BARYC_CNTL                                        = 0x000286e0,
+	PERSP_CENTER_ENA_mask                             = 0x03 << 0,
+	PERSP_CENTER_ENA_shift                            = 0,
+	    X_OFF                                         = 0x00,
+	    PERSP_CENTER_ENA__X_ON_AT_CENTER              = 0x01,
+	    PERSP_CENTER_ENA__X_ON_AT_CENTROID            = 0x02,
+	PERSP_CENTROID_ENA_mask                           = 0x03 << 4,
+	PERSP_CENTROID_ENA_shift                          = 4,
+/* 	    X_OFF                                         = 0x00, */
+	    PERSP_CENTROID_ENA__X_ON_AT_CENTROID          = 0x01,
+	    PERSP_CENTROID_ENA__X_ON_AT_CENTER            = 0x02,
+	PERSP_SAMPLE_ENA_mask                             = 0x03 << 8,
+	PERSP_SAMPLE_ENA_shift                            = 8,
+/* 	    X_OFF                                         = 0x00, */
+	PERSP_PULL_MODEL_ENA_mask                         = 0x03 << 12,
+	PERSP_PULL_MODEL_ENA_shift                        = 12,
+/* 	    X_OFF                                         = 0x00, */
+	LINEAR_CENTER_ENA_mask                            = 0x03 << 16,
+	LINEAR_CENTER_ENA_shift                           = 16,
+/* 	    X_OFF                                         = 0x00, */
+	    LINEAR_CENTER_ENA__X_ON_AT_CENTER             = 0x01,
+	    LINEAR_CENTER_ENA__X_ON_AT_CENTROID           = 0x02,
+	LINEAR_CENTROID_ENA_mask                          = 0x03 << 20,
+	LINEAR_CENTROID_ENA_shift                         = 20,
+/* 	    X_OFF                                         = 0x00, */
+	    LINEAR_CENTROID_ENA__X_ON_AT_CENTROID         = 0x01,
+	    LINEAR_CENTROID_ENA__X_ON_AT_CENTER           = 0x02,
+	LINEAR_SAMPLE_ENA_mask                            = 0x03 << 24,
+	LINEAR_SAMPLE_ENA_shift                           = 24,
+/* 	    X_OFF                                         = 0x00, */
+    SPI_PS_IN_CONTROL_2                                   = 0x000286e4,
+	LINE_STIPPLE_TEX_ADDR_mask                        = 0xff << 0,
+	LINE_STIPPLE_TEX_ADDR_shift                       = 0,
+	LINE_STIPPLE_TEX_ENA_bit                          = 1 << 8,
+    SPI_GPR_MGMT                                          = 0x000286f8,
+	SPI_GPR_MGMT__NUM_PS_GPRS_mask                    = 0x1f << 0,
+	SPI_GPR_MGMT__NUM_PS_GPRS_shift                   = 0,
+	SPI_GPR_MGMT__NUM_VS_GPRS_mask                    = 0x1f << 5,
+	SPI_GPR_MGMT__NUM_VS_GPRS_shift                   = 5,
+	NUM_GS_GPRS_mask                                  = 0x1f << 10,
+	NUM_GS_GPRS_shift                                 = 10,
+	NUM_ES_GPRS_mask                                  = 0x1f << 15,
+	NUM_ES_GPRS_shift                                 = 15,
+	NUM_HS_GPRS_mask                                  = 0x1f << 20,
+	NUM_HS_GPRS_shift                                 = 20,
+	NUM_LS_GPRS_mask                                  = 0x1f << 25,
+	NUM_LS_GPRS_shift                                 = 25,
+    SPI_LDS_MGMT                                          = 0x000286fc,
+	NUM_PS_LDS_mask                                   = 0xff << 0,
+	NUM_PS_LDS_shift                                  = 0,
+	NUM_LS_LDS_mask                                   = 0xff << 8,
+	NUM_LS_LDS_shift                                  = 8,
+    SPI_STACK_MGMT                                        = 0x00028700,
+	NUM_PS_STACK_mask                                 = 0x1f << 0,
+	NUM_PS_STACK_shift                                = 0,
+	NUM_VS_STACK_mask                                 = 0x1f << 5,
+	NUM_VS_STACK_shift                                = 5,
+	NUM_GS_STACK_mask                                 = 0x1f << 10,
+	NUM_GS_STACK_shift                                = 10,
+	NUM_ES_STACK_mask                                 = 0x1f << 15,
+	NUM_ES_STACK_shift                                = 15,
+	NUM_HS_STACK_mask                                 = 0x1f << 20,
+	NUM_HS_STACK_shift                                = 20,
+	NUM_LS_STACK_mask                                 = 0x1f << 25,
+	NUM_LS_STACK_shift                                = 25,
+    SPI_WAVE_MGMT_1                                       = 0x00028704,
+	NUM_PS_WAVES_mask                                 = 0x1f << 0,
+	NUM_PS_WAVES_shift                                = 0,
+	NUM_VS_WAVES_mask                                 = 0x1f << 5,
+	NUM_VS_WAVES_shift                                = 5,
+	NUM_GS_WAVES_mask                                 = 0x1f << 10,
+	NUM_GS_WAVES_shift                                = 10,
+	NUM_ES_WAVES_mask                                 = 0x1f << 15,
+	NUM_ES_WAVES_shift                                = 15,
+	NUM_HS_WAVES_mask                                 = 0x1f << 20,
+	NUM_HS_WAVES_shift                                = 20,
+	NUM_LS_WAVES_mask                                 = 0x1f << 25,
+	NUM_LS_WAVES_shift                                = 25,
+    SPI_WAVE_MGMT_2                                       = 0x00028708,
+	NUM_CS_WAVES_ONE_RING_mask                        = 0x1f << 0,
+	NUM_CS_WAVES_ONE_RING_shift                       = 0,
+	NUM_CS_WAVES_MULTI_RING_mask                      = 0x1f << 5,
+	NUM_CS_WAVES_MULTI_RING_shift                     = 5,
+    CB_BLEND0_CONTROL                                     = 0x00028780,
+	CB_BLEND0_CONTROL_num                             = 8,
+	COLOR_SRCBLEND_mask                               = 0x1f << 0,
+	COLOR_SRCBLEND_shift                              = 0,
+	    BLEND_ZERO                                    = 0x00,
+	    BLEND_ONE                                     = 0x01,
+	    BLEND_SRC_COLOR                               = 0x02,
+	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03,
+	    BLEND_SRC_ALPHA                               = 0x04,
+	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05,
+	    BLEND_DST_ALPHA                               = 0x06,
+	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07,
+	    BLEND_DST_COLOR                               = 0x08,
+	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09,
+	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a,
+	    BLEND_BOTH_SRC_ALPHA                          = 0x0b,
+	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c,
+	    BLEND_CONSTANT_COLOR                          = 0x0d,
+	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e,
+	    BLEND_SRC1_COLOR                              = 0x0f,
+	    BLEND_INV_SRC1_COLOR                          = 0x10,
+	    BLEND_SRC1_ALPHA                              = 0x11,
+	    BLEND_INV_SRC1_ALPHA                          = 0x12,
+	    BLEND_CONSTANT_ALPHA                          = 0x13,
+	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14,
+	COLOR_COMB_FCN_mask                               = 0x07 << 5,
+	COLOR_COMB_FCN_shift                              = 5,
+	    COMB_DST_PLUS_SRC                             = 0x00,
+	    COMB_SRC_MINUS_DST                            = 0x01,
+	    COMB_MIN_DST_SRC                              = 0x02,
+	    COMB_MAX_DST_SRC                              = 0x03,
+	    COMB_DST_MINUS_SRC                            = 0x04,
+	COLOR_DESTBLEND_mask                              = 0x1f << 8,
+	COLOR_DESTBLEND_shift                             = 8,
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+	ALPHA_SRCBLEND_mask                               = 0x1f << 16,
+	ALPHA_SRCBLEND_shift                              = 16,
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+	ALPHA_COMB_FCN_mask                               = 0x07 << 21,
+	ALPHA_COMB_FCN_shift                              = 21,
+/* 	    COMB_DST_PLUS_SRC                             = 0x00, */
+/* 	    COMB_SRC_MINUS_DST                            = 0x01, */
+/* 	    COMB_MIN_DST_SRC                              = 0x02, */
+/* 	    COMB_MAX_DST_SRC                              = 0x03, */
+/* 	    COMB_DST_MINUS_SRC                            = 0x04, */
+	ALPHA_DESTBLEND_mask                              = 0x1f << 24,
+	ALPHA_DESTBLEND_shift                             = 24,
+/* 	    BLEND_ZERO                                    = 0x00, */
+/* 	    BLEND_ONE                                     = 0x01, */
+/* 	    BLEND_SRC_COLOR                               = 0x02, */
+/* 	    BLEND_ONE_MINUS_SRC_COLOR                     = 0x03, */
+/* 	    BLEND_SRC_ALPHA                               = 0x04, */
+/* 	    BLEND_ONE_MINUS_SRC_ALPHA                     = 0x05, */
+/* 	    BLEND_DST_ALPHA                               = 0x06, */
+/* 	    BLEND_ONE_MINUS_DST_ALPHA                     = 0x07, */
+/* 	    BLEND_DST_COLOR                               = 0x08, */
+/* 	    BLEND_ONE_MINUS_DST_COLOR                     = 0x09, */
+/* 	    BLEND_SRC_ALPHA_SATURATE                      = 0x0a, */
+/* 	    BLEND_BOTH_SRC_ALPHA                          = 0x0b, */
+/* 	    BLEND_BOTH_INV_SRC_ALPHA                      = 0x0c, */
+/* 	    BLEND_CONSTANT_COLOR                          = 0x0d, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_COLOR                = 0x0e, */
+/* 	    BLEND_SRC1_COLOR                              = 0x0f, */
+/* 	    BLEND_INV_SRC1_COLOR                          = 0x10, */
+/* 	    BLEND_SRC1_ALPHA                              = 0x11, */
+/* 	    BLEND_INV_SRC1_ALPHA                          = 0x12, */
+/* 	    BLEND_CONSTANT_ALPHA                          = 0x13, */
+/* 	    BLEND_ONE_MINUS_CONSTANT_ALPHA                = 0x14, */
+	SEPARATE_ALPHA_BLEND_bit                          = 1 << 29,
+	CB_BLEND0_CONTROL__ENABLE_bit                     = 1 << 30,
+    PA_CL_POINT_X_RAD                                     = 0x000287d4,
+    PA_CL_POINT_Y_RAD                                     = 0x000287d8,
+    PA_CL_POINT_SIZE                                      = 0x000287dc,
+    PA_CL_POINT_CULL_RAD                                  = 0x000287e0,
+    VGT_DMA_BASE_HI                                       = 0x000287e4,
+	VGT_DMA_BASE_HI__BASE_ADDR_mask                   = 0xff << 0,
+	VGT_DMA_BASE_HI__BASE_ADDR_shift                  = 0,
+    VGT_DMA_BASE                                          = 0x000287e8,
+    VGT_DRAW_INITIATOR                                    = 0x000287f0,
+	SOURCE_SELECT_mask                                = 0x03 << 0,
+	SOURCE_SELECT_shift                               = 0,
+	    DI_SRC_SEL_DMA                                = 0x00,
+	    DI_SRC_SEL_IMMEDIATE                          = 0x01,
+	    DI_SRC_SEL_AUTO_INDEX                         = 0x02,
+	    DI_SRC_SEL_RESERVED                           = 0x03,
+	MAJOR_MODE_mask                                   = 0x03 << 2,
+	MAJOR_MODE_shift                                  = 2,
+	    DI_MAJOR_MODE_0                               = 0x00,
+	    DI_MAJOR_MODE_1                               = 0x01,
+	NOT_EOP_bit                                       = 1 << 5,
+	USE_OPAQUE_bit                                    = 1 << 6,
+    VGT_IMMED_DATA                                        = 0x000287f4,
+    VGT_EVENT_ADDRESS_REG                                 = 0x000287f8,
+	ADDRESS_LOW_mask                                  = 0xfffffff << 0,
+	ADDRESS_LOW_shift                                 = 0,
+    DB_DEPTH_CONTROL                                      = 0x00028800,
+	STENCIL_ENABLE_bit                                = 1 << 0,
+	Z_ENABLE_bit                                      = 1 << 1,
+	Z_WRITE_ENABLE_bit                                = 1 << 2,
+	ZFUNC_mask                                        = 0x07 << 4,
+	ZFUNC_shift                                       = 4,
+	    FRAG_NEVER                                    = 0x00,
+	    FRAG_LESS                                     = 0x01,
+	    FRAG_EQUAL                                    = 0x02,
+	    FRAG_LEQUAL                                   = 0x03,
+	    FRAG_GREATER                                  = 0x04,
+	    FRAG_NOTEQUAL                                 = 0x05,
+	    FRAG_GEQUAL                                   = 0x06,
+	    FRAG_ALWAYS                                   = 0x07,
+	BACKFACE_ENABLE_bit                               = 1 << 7,
+	STENCILFUNC_mask                                  = 0x07 << 8,
+	STENCILFUNC_shift                                 = 8,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	STENCILFAIL_mask                                  = 0x07 << 11,
+	STENCILFAIL_shift                                 = 11,
+	    STENCIL_KEEP                                  = 0x00,
+	    STENCIL_ZERO                                  = 0x01,
+	    STENCIL_REPLACE                               = 0x02,
+	    STENCIL_INCR_CLAMP                            = 0x03,
+	    STENCIL_DECR_CLAMP                            = 0x04,
+	    STENCIL_INVERT                                = 0x05,
+	    STENCIL_INCR_WRAP                             = 0x06,
+	    STENCIL_DECR_WRAP                             = 0x07,
+	STENCILZPASS_mask                                 = 0x07 << 14,
+	STENCILZPASS_shift                                = 14,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZFAIL_mask                                 = 0x07 << 17,
+	STENCILZFAIL_shift                                = 17,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILFUNC_BF_mask                               = 0x07 << 20,
+	STENCILFUNC_BF_shift                              = 20,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	STENCILFAIL_BF_mask                               = 0x07 << 23,
+	STENCILFAIL_BF_shift                              = 23,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZPASS_BF_mask                              = 0x07 << 26,
+	STENCILZPASS_BF_shift                             = 26,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+	STENCILZFAIL_BF_mask                              = 0x07 << 29,
+	STENCILZFAIL_BF_shift                             = 29,
+/* 	    STENCIL_KEEP                                  = 0x00, */
+/* 	    STENCIL_ZERO                                  = 0x01, */
+/* 	    STENCIL_REPLACE                               = 0x02, */
+/* 	    STENCIL_INCR_CLAMP                            = 0x03, */
+/* 	    STENCIL_DECR_CLAMP                            = 0x04, */
+/* 	    STENCIL_INVERT                                = 0x05, */
+/* 	    STENCIL_INCR_WRAP                             = 0x06, */
+/* 	    STENCIL_DECR_WRAP                             = 0x07, */
+    DB_EQAA                                               = 0x00028804,
+    CB_COLOR_CONTROL                                      = 0x00028808,
+	DEGAMMA_ENABLE_bit                                = 1 << 3,
+	CB_COLOR_CONTROL__MODE_mask                       = 0x07 << 4,
+	CB_COLOR_CONTROL__MODE_shift                      = 4,
+	    CB_DISABLE                                    = 0x00,
+	    CB_NORMAL                                     = 0x01,
+	    CB_ELIMINATE_FAST_CLEAR                       = 0x02,
+	    CB_RESOLVE                                    = 0x03,
+	    CB_DECOMPRESS                                 = 0x04,
+	    CB_FMASK_DECOMPRESS                           = 0x05,
+	ROP3_mask                                         = 0xff << 16,
+	ROP3_shift                                        = 16,
+	    X_0X00                                        = 0x00,
+	    X_0X05                                        = 0x05,
+	    X_0X0A                                        = 0x0a,
+	    X_0X0F                                        = 0x0f,
+	    X_0X11                                        = 0x11,
+	    X_0X22                                        = 0x22,
+	    X_0X33                                        = 0x33,
+	    X_0X44                                        = 0x44,
+	    X_0X50                                        = 0x50,
+	    X_0X55                                        = 0x55,
+	    X_0X5A                                        = 0x5a,
+	    X_0X5F                                        = 0x5f,
+	    X_0X66                                        = 0x66,
+	    X_0X77                                        = 0x77,
+	    X_0X88                                        = 0x88,
+	    X_0X99                                        = 0x99,
+	    X_0XA0                                        = 0xa0,
+	    X_0XA5                                        = 0xa5,
+	    X_0XAA                                        = 0xaa,
+	    X_0XAF                                        = 0xaf,
+	    X_0XBB                                        = 0xbb,
+	    X_0XCC                                        = 0xcc,
+	    X_0XDD                                        = 0xdd,
+	    X_0XEE                                        = 0xee,
+	    X_0XF0                                        = 0xf0,
+	    X_0XF5                                        = 0xf5,
+	    X_0XFA                                        = 0xfa,
+	    X_0XFF                                        = 0xff,
+    DB_SHADER_CONTROL                                     = 0x0002880c,
+	Z_EXPORT_ENABLE_bit                               = 1 << 0,
+	STENCIL_REF_EXPORT_ENABLE_bit                     = 1 << 1,
+	Z_ORDER_mask                                      = 0x03 << 4,
+	Z_ORDER_shift                                     = 4,
+	    LATE_Z                                        = 0x00,
+	    EARLY_Z_THEN_LATE_Z                           = 0x01,
+	    RE_Z                                          = 0x02,
+	    EARLY_Z_THEN_RE_Z                             = 0x03,
+	KILL_ENABLE_bit                                   = 1 << 6,
+	COVERAGE_TO_MASK_ENABLE_bit                       = 1 << 7,
+	MASK_EXPORT_ENABLE_bit                            = 1 << 8,
+	DUAL_EXPORT_ENABLE_bit                            = 1 << 9,
+	EXEC_ON_HIER_FAIL_bit                             = 1 << 10,
+	EXEC_ON_NOOP_bit                                  = 1 << 11,
+	ALPHA_TO_MASK_DISABLE_bit                         = 1 << 12,
+	DB_SOURCE_FORMAT_mask                             = 0x03 << 13,
+	DB_SOURCE_FORMAT_shift                            = 13,
+	    EXPORT_DB_FULL                                = 0x00,
+	    EXPORT_DB_FOUR16                              = 0x01,
+	    EXPORT_DB_TWO                                 = 0x02,
+	DEPTH_BEFORE_SHADER_bit                           = 1 << 15,
+	CONSERVATIVE_Z_EXPORT_mask                        = 0x03 << 16,
+	CONSERVATIVE_Z_EXPORT_shift                       = 16,
+	    EXPORT_ANY_Z                                  = 0x00,
+	    EXPORT_LESS_THAN_Z                            = 0x01,
+	    EXPORT_GREATER_THAN_Z                         = 0x02,
+	    EXPORT_RESERVED                               = 0x03,
+    PA_CL_CLIP_CNTL                                       = 0x00028810,
+	UCP_ENA_0_bit                                     = 1 << 0,
+	UCP_ENA_1_bit                                     = 1 << 1,
+	UCP_ENA_2_bit                                     = 1 << 2,
+	UCP_ENA_3_bit                                     = 1 << 3,
+	UCP_ENA_4_bit                                     = 1 << 4,
+	UCP_ENA_5_bit                                     = 1 << 5,
+	PS_UCP_Y_SCALE_NEG_bit                            = 1 << 13,
+	PS_UCP_MODE_mask                                  = 0x03 << 14,
+	PS_UCP_MODE_shift                                 = 14,
+	CLIP_DISABLE_bit                                  = 1 << 16,
+	UCP_CULL_ONLY_ENA_bit                             = 1 << 17,
+	BOUNDARY_EDGE_FLAG_ENA_bit                        = 1 << 18,
+	DX_CLIP_SPACE_DEF_bit                             = 1 << 19,
+	DIS_CLIP_ERR_DETECT_bit                           = 1 << 20,
+	VTX_KILL_OR_bit                                   = 1 << 21,
+	DX_RASTERIZATION_KILL_bit                         = 1 << 22,
+	DX_LINEAR_ATTR_CLIP_ENA_bit                       = 1 << 24,
+	VTE_VPORT_PROVOKE_DISABLE_bit                     = 1 << 25,
+	ZCLIP_NEAR_DISABLE_bit                            = 1 << 26,
+	ZCLIP_FAR_DISABLE_bit                             = 1 << 27,
+    PA_SU_SC_MODE_CNTL                                    = 0x00028814,
+	CULL_FRONT_bit                                    = 1 << 0,
+	CULL_BACK_bit                                     = 1 << 1,
+	FACE_bit                                          = 1 << 2,
+	POLY_MODE_mask                                    = 0x03 << 3,
+	POLY_MODE_shift                                   = 3,
+	    X_DISABLE_POLY_MODE                           = 0x00,
+	    X_DUAL_MODE                                   = 0x01,
+	POLYMODE_FRONT_PTYPE_mask                         = 0x07 << 5,
+	POLYMODE_FRONT_PTYPE_shift                        = 5,
+	    X_DRAW_POINTS                                 = 0x00,
+	    X_DRAW_LINES                                  = 0x01,
+	    X_DRAW_TRIANGLES                              = 0x02,
+	POLYMODE_BACK_PTYPE_mask                          = 0x07 << 8,
+	POLYMODE_BACK_PTYPE_shift                         = 8,
+/* 	    X_DRAW_POINTS                                 = 0x00, */
+/* 	    X_DRAW_LINES                                  = 0x01, */
+/* 	    X_DRAW_TRIANGLES                              = 0x02, */
+	POLY_OFFSET_FRONT_ENABLE_bit                      = 1 << 11,
+	POLY_OFFSET_BACK_ENABLE_bit                       = 1 << 12,
+	POLY_OFFSET_PARA_ENABLE_bit                       = 1 << 13,
+	VTX_WINDOW_OFFSET_ENABLE_bit                      = 1 << 16,
+	PROVOKING_VTX_LAST_bit                            = 1 << 19,
+	PERSP_CORR_DIS_bit                                = 1 << 20,
+	MULTI_PRIM_IB_ENA_bit                             = 1 << 21,
+    PA_CL_VTE_CNTL                                        = 0x00028818,
+	VPORT_X_SCALE_ENA_bit                             = 1 << 0,
+	VPORT_X_OFFSET_ENA_bit                            = 1 << 1,
+	VPORT_Y_SCALE_ENA_bit                             = 1 << 2,
+	VPORT_Y_OFFSET_ENA_bit                            = 1 << 3,
+	VPORT_Z_SCALE_ENA_bit                             = 1 << 4,
+	VPORT_Z_OFFSET_ENA_bit                            = 1 << 5,
+	VTX_XY_FMT_bit                                    = 1 << 8,
+	VTX_Z_FMT_bit                                     = 1 << 9,
+	VTX_W0_FMT_bit                                    = 1 << 10,
+    PA_CL_VS_OUT_CNTL                                     = 0x0002881c,
+	CLIP_DIST_ENA_0_bit                               = 1 << 0,
+	CLIP_DIST_ENA_1_bit                               = 1 << 1,
+	CLIP_DIST_ENA_2_bit                               = 1 << 2,
+	CLIP_DIST_ENA_3_bit                               = 1 << 3,
+	CLIP_DIST_ENA_4_bit                               = 1 << 4,
+	CLIP_DIST_ENA_5_bit                               = 1 << 5,
+	CLIP_DIST_ENA_6_bit                               = 1 << 6,
+	CLIP_DIST_ENA_7_bit                               = 1 << 7,
+	CULL_DIST_ENA_0_bit                               = 1 << 8,
+	CULL_DIST_ENA_1_bit                               = 1 << 9,
+	CULL_DIST_ENA_2_bit                               = 1 << 10,
+	CULL_DIST_ENA_3_bit                               = 1 << 11,
+	CULL_DIST_ENA_4_bit                               = 1 << 12,
+	CULL_DIST_ENA_5_bit                               = 1 << 13,
+	CULL_DIST_ENA_6_bit                               = 1 << 14,
+	CULL_DIST_ENA_7_bit                               = 1 << 15,
+	USE_VTX_POINT_SIZE_bit                            = 1 << 16,
+	USE_VTX_EDGE_FLAG_bit                             = 1 << 17,
+	USE_VTX_RENDER_TARGET_INDX_bit                    = 1 << 18,
+	USE_VTX_VIEWPORT_INDX_bit                         = 1 << 19,
+	USE_VTX_KILL_FLAG_bit                             = 1 << 20,
+	VS_OUT_MISC_VEC_ENA_bit                           = 1 << 21,
+	VS_OUT_CCDIST0_VEC_ENA_bit                        = 1 << 22,
+	VS_OUT_CCDIST1_VEC_ENA_bit                        = 1 << 23,
+    PA_CL_NANINF_CNTL                                     = 0x00028820,
+	VTE_XY_INF_DISCARD_bit                            = 1 << 0,
+	VTE_Z_INF_DISCARD_bit                             = 1 << 1,
+	VTE_W_INF_DISCARD_bit                             = 1 << 2,
+	VTE_0XNANINF_IS_0_bit                             = 1 << 3,
+	VTE_XY_NAN_RETAIN_bit                             = 1 << 4,
+	VTE_Z_NAN_RETAIN_bit                              = 1 << 5,
+	VTE_W_NAN_RETAIN_bit                              = 1 << 6,
+	VTE_W_RECIP_NAN_IS_0_bit                          = 1 << 7,
+	VS_XY_NAN_TO_INF_bit                              = 1 << 8,
+	VS_XY_INF_RETAIN_bit                              = 1 << 9,
+	VS_Z_NAN_TO_INF_bit                               = 1 << 10,
+	VS_Z_INF_RETAIN_bit                               = 1 << 11,
+	VS_W_NAN_TO_INF_bit                               = 1 << 12,
+	VS_W_INF_RETAIN_bit                               = 1 << 13,
+	VS_CLIP_DIST_INF_DISCARD_bit                      = 1 << 14,
+	VTE_NO_OUTPUT_NEG_0_bit                           = 1 << 20,
+    PA_SU_LINE_STIPPLE_CNTL                               = 0x00028824,
+	LINE_STIPPLE_RESET_mask                           = 0x03 << 0,
+	LINE_STIPPLE_RESET_shift                          = 0,
+	EXPAND_FULL_LENGTH_bit                            = 1 << 2,
+	FRACTIONAL_ACCUM_bit                              = 1 << 3,
+	DIAMOND_ADJUST_bit                                = 1 << 4,
+    PA_SU_LINE_STIPPLE_SCALE                              = 0x00028828,
+    PA_SU_PRIM_FILTER_CNTL                                = 0x0002882c,
+	TRIANGLE_FILTER_DISABLE_bit                       = 1 << 0,
+	LINE_FILTER_DISABLE_bit                           = 1 << 1,
+	POINT_FILTER_DISABLE_bit                          = 1 << 2,
+	RECTANGLE_FILTER_DISABLE_bit                      = 1 << 3,
+	TRIANGLE_EXPAND_ENA_bit                           = 1 << 4,
+	LINE_EXPAND_ENA_bit                               = 1 << 5,
+	POINT_EXPAND_ENA_bit                              = 1 << 6,
+	RECTANGLE_EXPAND_ENA_bit                          = 1 << 7,
+	PRIM_EXPAND_CONSTANT_mask                         = 0xff << 8,
+	PRIM_EXPAND_CONSTANT_shift                        = 8,
+    SQ_LSTMP_RING_ITEMSIZE                                = 0x00028830,
+	ITEMSIZE_mask                                     = 0x7fff << 0,
+	ITEMSIZE_shift                                    = 0,
+    SQ_HSTMP_RING_ITEMSIZE                                = 0x00028834,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_PGM_START_PS                                       = 0x00028840,
+    SQ_PGM_RESOURCES_PS                                   = 0x00028844,
+	NUM_GPRS_mask                                     = 0xff << 0,
+	NUM_GPRS_shift                                    = 0,
+	STACK_SIZE_mask                                   = 0xff << 8,
+	STACK_SIZE_shift                                  = 8,
+	DX10_CLAMP_bit                                    = 1 << 21,
+	UNCACHED_FIRST_INST_bit                           = 1 << 28,
+	CLAMP_CONSTS_bit                                  = 1 << 31,
+    SQ_PGM_RESOURCES_2_PS                                 = 0x00028848,
+	SINGLE_ROUND_mask                                 = 0x03 << 0,
+	SINGLE_ROUND_shift                                = 0,
+	    SQ_ROUND_NEAREST_EVEN                         = 0x00,
+	    SQ_ROUND_PLUS_INFINITY                        = 0x01,
+	    SQ_ROUND_MINUS_INFINITY                       = 0x02,
+	    SQ_ROUND_TO_ZERO                              = 0x03,
+	DOUBLE_ROUND_mask                                 = 0x03 << 2,
+	DOUBLE_ROUND_shift                                = 2,
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4,
+	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5,
+	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6,
+	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7,
+	SINGLE_IEEE_MODE_bit                              = 1 << 8,
+	DOUBLE_IEEE_MODE_bit                              = 1 << 9,
+    SQ_PGM_EXPORTS_PS                                     = 0x0002884c,
+	EXPORT_MODE_mask                                  = 0x1f << 0,
+	EXPORT_MODE_shift                                 = 0,
+    SQ_PGM_START_VS                                       = 0x0002885c,
+    SQ_PGM_RESOURCES_VS                                   = 0x00028860,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+	USE_LS_CONSTS_bit                                 = 1 << 16,
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_VS                                 = 0x00028864,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+/* 	SINGLE_IEEE_MODE_bit                              = 1 << 8, */
+/* 	DOUBLE_IEEE_MODE_bit                              = 1 << 9, */
+    SQ_PGM_START_GS                                       = 0x00028874,
+    SQ_PGM_RESOURCES_GS                                   = 0x00028878,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_GS                                 = 0x0002887c,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+/* 	SINGLE_IEEE_MODE_bit                              = 1 << 8, */
+/* 	DOUBLE_IEEE_MODE_bit                              = 1 << 9, */
+    SQ_PGM_START_ES                                       = 0x0002888c,
+    SQ_PGM_RESOURCES_ES                                   = 0x00028890,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	USE_LS_CONSTS_bit                                 = 1 << 16, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_ES                                 = 0x00028894,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+/* 	SINGLE_IEEE_MODE_bit                              = 1 << 8, */
+/* 	DOUBLE_IEEE_MODE_bit                              = 1 << 9, */
+    SQ_PGM_START_FS                                       = 0x000288a4,
+    SQ_PGM_RESOURCES_FS                                   = 0x000288a8,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+    SQ_PGM_START_HS                                       = 0x000288b8,
+    SQ_PGM_RESOURCES_HS                                   = 0x000288bc,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_HS                                 = 0x000288c0,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+/* 	SINGLE_IEEE_MODE_bit                              = 1 << 8, */
+/* 	DOUBLE_IEEE_MODE_bit                              = 1 << 9, */
+    SQ_PGM_START_LS                                       = 0x000288d0,
+    SQ_PGM_RESOURCES_LS                                   = 0x000288d4,
+/* 	NUM_GPRS_mask                                     = 0xff << 0, */
+/* 	NUM_GPRS_shift                                    = 0, */
+/* 	STACK_SIZE_mask                                   = 0xff << 8, */
+/* 	STACK_SIZE_shift                                  = 8, */
+	USE_VS_CONSTS_bit                                 = 1 << 16,
+/* 	DX10_CLAMP_bit                                    = 1 << 21, */
+/* 	UNCACHED_FIRST_INST_bit                           = 1 << 28, */
+    SQ_PGM_RESOURCES_2_LS                                 = 0x000288d8,
+/* 	SINGLE_ROUND_mask                                 = 0x03 << 0, */
+/* 	SINGLE_ROUND_shift                                = 0, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	DOUBLE_ROUND_mask                                 = 0x03 << 2, */
+/* 	DOUBLE_ROUND_shift                                = 2, */
+/* 	    SQ_ROUND_NEAREST_EVEN                         = 0x00, */
+/* 	    SQ_ROUND_PLUS_INFINITY                        = 0x01, */
+/* 	    SQ_ROUND_MINUS_INFINITY                       = 0x02, */
+/* 	    SQ_ROUND_TO_ZERO                              = 0x03, */
+/* 	ALLOW_SINGLE_DENORM_IN_bit                        = 1 << 4, */
+/* 	ALLOW_SINGLE_DENORM_OUT_bit                       = 1 << 5, */
+/* 	ALLOW_DOUBLE_DENORM_IN_bit                        = 1 << 6, */
+/* 	ALLOW_DOUBLE_DENORM_OUT_bit                       = 1 << 7, */
+/* 	SINGLE_IEEE_MODE_bit                              = 1 << 8, */
+/* 	DOUBLE_IEEE_MODE_bit                              = 1 << 9, */
+    SQ_VTX_SEMANTIC_CLEAR                                 = 0x000288f0,
+    SQ_ESGS_RING_ITEMSIZE                                 = 0x00028900,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSVS_RING_ITEMSIZE                                 = 0x00028904,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_ESTMP_RING_ITEMSIZE                                = 0x00028908,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSTMP_RING_ITEMSIZE                                = 0x0002890c,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_VSTMP_RING_ITEMSIZE                                = 0x00028910,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_PSTMP_RING_ITEMSIZE                                = 0x00028914,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE                                   = 0x0002891c,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE_1                                 = 0x00028920,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE_2                                 = 0x00028924,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GS_VERT_ITEMSIZE_3                                 = 0x00028928,
+/* 	ITEMSIZE_mask                                     = 0x7fff << 0, */
+/* 	ITEMSIZE_shift                                    = 0, */
+    SQ_GSVS_RING_OFFSET_1                                 = 0x0002892c,
+	SQ_GSVS_RING_OFFSET_1__OFFSET_mask                = 0x7fff << 0,
+	SQ_GSVS_RING_OFFSET_1__OFFSET_shift               = 0,
+    SQ_GSVS_RING_OFFSET_2                                 = 0x00028930,
+	SQ_GSVS_RING_OFFSET_2__OFFSET_mask                = 0x7fff << 0,
+	SQ_GSVS_RING_OFFSET_2__OFFSET_shift               = 0,
+    SQ_GSVS_RING_OFFSET_3                                 = 0x00028934,
+	SQ_GSVS_RING_OFFSET_3__OFFSET_mask                = 0x7fff << 0,
+	SQ_GSVS_RING_OFFSET_3__OFFSET_shift               = 0,
+    SQ_ALU_CONST_CACHE_PS_0                               = 0x00028940,
+	SQ_ALU_CONST_CACHE_PS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_VS_0                               = 0x00028980,
+	SQ_ALU_CONST_CACHE_VS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_GS_0                               = 0x000289c0,
+	SQ_ALU_CONST_CACHE_GS_0_num                       = 16,
+    PA_SU_POINT_SIZE                                      = 0x00028a00,
+	HEIGHT_mask                                       = 0xffff << 0,
+	HEIGHT_shift                                      = 0,
+	PA_SU_POINT_SIZE__WIDTH_mask                      = 0xffff << 16,
+	PA_SU_POINT_SIZE__WIDTH_shift                     = 16,
+    PA_SU_POINT_MINMAX                                    = 0x00028a04,
+	MIN_SIZE_mask                                     = 0xffff << 0,
+	MIN_SIZE_shift                                    = 0,
+	PA_SU_POINT_MINMAX__MAX_SIZE_mask                 = 0xffff << 16,
+	PA_SU_POINT_MINMAX__MAX_SIZE_shift                = 16,
+    PA_SU_LINE_CNTL                                       = 0x00028a08,
+	PA_SU_LINE_CNTL__WIDTH_mask                       = 0xffff << 0,
+	PA_SU_LINE_CNTL__WIDTH_shift                      = 0,
+    PA_SC_LINE_STIPPLE                                    = 0x00028a0c,
+	LINE_PATTERN_mask                                 = 0xffff << 0,
+	LINE_PATTERN_shift                                = 0,
+	REPEAT_COUNT_mask                                 = 0xff << 16,
+	REPEAT_COUNT_shift                                = 16,
+	PATTERN_BIT_ORDER_bit                             = 1 << 28,
+	AUTO_RESET_CNTL_mask                              = 0x03 << 29,
+	AUTO_RESET_CNTL_shift                             = 29,
+    VGT_OUTPUT_PATH_CNTL                                  = 0x00028a10,
+	PATH_SELECT_mask                                  = 0x07 << 0,
+	PATH_SELECT_shift                                 = 0,
+	    VGT_OUTPATH_VTX_REUSE                         = 0x00,
+	    VGT_OUTPATH_TESS_EN                           = 0x01,
+	    VGT_OUTPATH_PASSTHRU                          = 0x02,
+	    VGT_OUTPATH_GS_BLOCK                          = 0x03,
+	    VGT_OUTPATH_HS_BLOCK                          = 0x04,
+    VGT_HOS_CNTL                                          = 0x00028a14,
+	TESS_MODE_mask                                    = 0x03 << 0,
+	TESS_MODE_shift                                   = 0,
+    VGT_HOS_MAX_TESS_LEVEL                                = 0x00028a18,
+    VGT_HOS_MIN_TESS_LEVEL                                = 0x00028a1c,
+    VGT_HOS_REUSE_DEPTH                                   = 0x00028a20,
+	REUSE_DEPTH_mask                                  = 0xff << 0,
+	REUSE_DEPTH_shift                                 = 0,
+    VGT_GROUP_PRIM_TYPE                                   = 0x00028a24,
+	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_mask               = 0x1f << 0,
+	VGT_GROUP_PRIM_TYPE__PRIM_TYPE_shift              = 0,
+	    VGT_GRP_3D_POINT                              = 0x00,
+	    VGT_GRP_3D_LINE                               = 0x01,
+	    VGT_GRP_3D_TRI                                = 0x02,
+	    VGT_GRP_3D_RECT                               = 0x03,
+	    VGT_GRP_3D_QUAD                               = 0x04,
+	    VGT_GRP_2D_COPY_RECT_V0                       = 0x05,
+	    VGT_GRP_2D_COPY_RECT_V1                       = 0x06,
+	    VGT_GRP_2D_COPY_RECT_V2                       = 0x07,
+	    VGT_GRP_2D_COPY_RECT_V3                       = 0x08,
+	    VGT_GRP_2D_FILL_RECT                          = 0x09,
+	    VGT_GRP_2D_LINE                               = 0x0a,
+	    VGT_GRP_2D_TRI                                = 0x0b,
+	    VGT_GRP_PRIM_INDEX_LINE                       = 0x0c,
+	    VGT_GRP_PRIM_INDEX_TRI                        = 0x0d,
+	    VGT_GRP_PRIM_INDEX_QUAD                       = 0x0e,
+	    VGT_GRP_3D_LINE_ADJ                           = 0x0f,
+	    VGT_GRP_3D_TRI_ADJ                            = 0x10,
+	    VGT_GRP_3D_PATCH                              = 0x11,
+	RETAIN_ORDER_bit                                  = 1 << 14,
+	RETAIN_QUADS_bit                                  = 1 << 15,
+	PRIM_ORDER_mask                                   = 0x07 << 16,
+	PRIM_ORDER_shift                                  = 16,
+	    VGT_GRP_LIST                                  = 0x00,
+	    VGT_GRP_STRIP                                 = 0x01,
+	    VGT_GRP_FAN                                   = 0x02,
+	    VGT_GRP_LOOP                                  = 0x03,
+	    VGT_GRP_POLYGON                               = 0x04,
+    VGT_GROUP_FIRST_DECR                                  = 0x00028a28,
+	FIRST_DECR_mask                                   = 0x0f << 0,
+	FIRST_DECR_shift                                  = 0,
+    VGT_GROUP_DECR                                        = 0x00028a2c,
+	DECR_mask                                         = 0x0f << 0,
+	DECR_shift                                        = 0,
+    VGT_GROUP_VECT_0_CNTL                                 = 0x00028a30,
+	COMP_X_EN_bit                                     = 1 << 0,
+	COMP_Y_EN_bit                                     = 1 << 1,
+	COMP_Z_EN_bit                                     = 1 << 2,
+	COMP_W_EN_bit                                     = 1 << 3,
+	VGT_GROUP_VECT_0_CNTL__STRIDE_mask                = 0xff << 8,
+	VGT_GROUP_VECT_0_CNTL__STRIDE_shift               = 8,
+	SHIFT_mask                                        = 0xff << 16,
+	SHIFT_shift                                       = 16,
+    VGT_GROUP_VECT_1_CNTL                                 = 0x00028a34,
+/* 	COMP_X_EN_bit                                     = 1 << 0, */
+/* 	COMP_Y_EN_bit                                     = 1 << 1, */
+/* 	COMP_Z_EN_bit                                     = 1 << 2, */
+/* 	COMP_W_EN_bit                                     = 1 << 3, */
+	VGT_GROUP_VECT_1_CNTL__STRIDE_mask                = 0xff << 8,
+	VGT_GROUP_VECT_1_CNTL__STRIDE_shift               = 8,
+/* 	SHIFT_mask                                        = 0xff << 16, */
+/* 	SHIFT_shift                                       = 16, */
+    VGT_GROUP_VECT_0_FMT_CNTL                             = 0x00028a38,
+	X_CONV_mask                                       = 0x0f << 0,
+	X_CONV_shift                                      = 0,
+	    VGT_GRP_INDEX_16                              = 0x00,
+	    VGT_GRP_INDEX_32                              = 0x01,
+	    VGT_GRP_UINT_16                               = 0x02,
+	    VGT_GRP_UINT_32                               = 0x03,
+	    VGT_GRP_SINT_16                               = 0x04,
+	    VGT_GRP_SINT_32                               = 0x05,
+	    VGT_GRP_FLOAT_32                              = 0x06,
+	    VGT_GRP_AUTO_PRIM                             = 0x07,
+	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08,
+	X_OFFSET_mask                                     = 0x0f << 4,
+	X_OFFSET_shift                                    = 4,
+	Y_CONV_mask                                       = 0x0f << 8,
+	Y_CONV_shift                                      = 8,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	Y_OFFSET_mask                                     = 0x0f << 12,
+	Y_OFFSET_shift                                    = 12,
+	Z_CONV_mask                                       = 0x0f << 16,
+	Z_CONV_shift                                      = 16,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	Z_OFFSET_mask                                     = 0x0f << 20,
+	Z_OFFSET_shift                                    = 20,
+	W_CONV_mask                                       = 0x0f << 24,
+	W_CONV_shift                                      = 24,
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+	W_OFFSET_mask                                     = 0x0f << 28,
+	W_OFFSET_shift                                    = 28,
+    VGT_GROUP_VECT_1_FMT_CNTL                             = 0x00028a3c,
+/* 	X_CONV_mask                                       = 0x0f << 0, */
+/* 	X_CONV_shift                                      = 0, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	X_OFFSET_mask                                     = 0x0f << 4, */
+/* 	X_OFFSET_shift                                    = 4, */
+/* 	Y_CONV_mask                                       = 0x0f << 8, */
+/* 	Y_CONV_shift                                      = 8, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	Y_OFFSET_mask                                     = 0x0f << 12, */
+/* 	Y_OFFSET_shift                                    = 12, */
+/* 	Z_CONV_mask                                       = 0x0f << 16, */
+/* 	Z_CONV_shift                                      = 16, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	Z_OFFSET_mask                                     = 0x0f << 20, */
+/* 	Z_OFFSET_shift                                    = 20, */
+/* 	W_CONV_mask                                       = 0x0f << 24, */
+/* 	W_CONV_shift                                      = 24, */
+/* 	    VGT_GRP_INDEX_16                              = 0x00, */
+/* 	    VGT_GRP_INDEX_32                              = 0x01, */
+/* 	    VGT_GRP_UINT_16                               = 0x02, */
+/* 	    VGT_GRP_UINT_32                               = 0x03, */
+/* 	    VGT_GRP_SINT_16                               = 0x04, */
+/* 	    VGT_GRP_SINT_32                               = 0x05, */
+/* 	    VGT_GRP_FLOAT_32                              = 0x06, */
+/* 	    VGT_GRP_AUTO_PRIM                             = 0x07, */
+/* 	    VGT_GRP_FIX_1_23_TO_FLOAT                     = 0x08, */
+/* 	W_OFFSET_mask                                     = 0x0f << 28, */
+/* 	W_OFFSET_shift                                    = 28, */
+    VGT_GS_MODE                                           = 0x00028a40,
+	VGT_GS_MODE__MODE_mask                            = 0x03 << 0,
+	VGT_GS_MODE__MODE_shift                           = 0,
+	    GS_OFF                                        = 0x00,
+	    GS_SCENARIO_A                                 = 0x01,
+	    GS_SCENARIO_B                                 = 0x02,
+	    GS_SCENARIO_G                                 = 0x03,
+	    GS_SCENARIO_C                                 = 0x04,
+	    SPRITE_EN                                     = 0x05,
+	ES_PASSTHRU_bit                                   = 1 << 2,
+	CUT_MODE_mask                                     = 0x03 << 3,
+	CUT_MODE_shift                                    = 3,
+	    GS_CUT_1024                                   = 0x00,
+	    GS_CUT_512                                    = 0x01,
+	    GS_CUT_256                                    = 0x02,
+	    GS_CUT_128                                    = 0x03,
+	MODE_HI_bit                                       = 1 << 8,
+    PA_SC_MODE_CNTL_0                                     = 0x00028a48,
+	MSAA_ENABLE_bit                                   = 1 << 0,
+	VPORT_SCISSOR_ENABLE_bit                          = 1 << 1,
+	LINE_STIPPLE_ENABLE_bit                           = 1 << 2,
+    VGT_ENHANCE                                           = 0x00028a50,
+    VGT_GS_PER_ES                                         = 0x00028a54,
+	GS_PER_ES_mask                                    = 0x7ff << 0,
+	GS_PER_ES_shift                                   = 0,
+    VGT_ES_PER_GS                                         = 0x00028a58,
+	ES_PER_GS_mask                                    = 0x7ff << 0,
+	ES_PER_GS_shift                                   = 0,
+    VGT_GS_PER_VS                                         = 0x00028a5c,
+	GS_PER_VS_mask                                    = 0x0f << 0,
+	GS_PER_VS_shift                                   = 0,
+    VGT_GS_OUT_PRIM_TYPE                                  = 0x00028a6c,
+	OUTPRIM_TYPE_mask                                 = 0x3f << 0,
+	OUTPRIM_TYPE_shift                                = 0,
+	    POINTLIST                                     = 0x00,
+	    LINESTRIP                                     = 0x01,
+	    TRISTRIP                                      = 0x02,
+    VGT_DMA_SIZE                                          = 0x00028a74,
+    VGT_DMA_MAX_SIZE                                      = 0x00028a78,
+    VGT_DMA_INDEX_TYPE                                    = 0x00028a7c,
+/* 	INDEX_TYPE_mask                                   = 0x03 << 0, */
+/* 	INDEX_TYPE_shift                                  = 0, */
+	    VGT_INDEX_16                                  = 0x00,
+	    VGT_INDEX_32                                  = 0x01,
+	SWAP_MODE_mask                                    = 0x03 << 2,
+	SWAP_MODE_shift                                   = 2,
+	    VGT_DMA_SWAP_NONE                             = 0x00,
+	    VGT_DMA_SWAP_16_BIT                           = 0x01,
+	    VGT_DMA_SWAP_32_BIT                           = 0x02,
+	    VGT_DMA_SWAP_WORD                             = 0x03,
+    VGT_PRIMITIVEID_EN                                    = 0x00028a84,
+	PRIMITIVEID_EN_bit                                = 1 << 0,
+    VGT_DMA_NUM_INSTANCES                                 = 0x00028a88,
+    VGT_EVENT_INITIATOR                                   = 0x00028a90,
+	EVENT_TYPE_mask                                   = 0x3f << 0,
+	EVENT_TYPE_shift                                  = 0,
+	    SAMPLE_STREAMOUTSTATS1                        = 0x01,
+	    SAMPLE_STREAMOUTSTATS2                        = 0x02,
+	    SAMPLE_STREAMOUTSTATS3                        = 0x03,
+	    CACHE_FLUSH_TS                                = 0x04,
+	    CONTEXT_DONE                                  = 0x05,
+	    CACHE_FLUSH                                   = 0x06,
+	    CS_PARTIAL_FLUSH                              = 0x07,
+	    VGT_STREAMOUT_SYNC                            = 0x08,
+	    RST_PIX_CNT                                   = 0x0d,
+	    VS_PARTIAL_FLUSH                              = 0x0f,
+	    PS_PARTIAL_FLUSH                              = 0x10,
+	    FLUSH_HS_OUTPUT                               = 0x11,
+	    FLUSH_LS_OUTPUT                               = 0x12,
+	    CACHE_FLUSH_AND_INV_TS_EVENT                  = 0x14,
+	    ZPASS_DONE                                    = 0x15,
+	    CACHE_FLUSH_AND_INV_EVENT                     = 0x16,
+	    PERFCOUNTER_START                             = 0x17,
+	    PERFCOUNTER_STOP                              = 0x18,
+	    PIPELINESTAT_START                            = 0x19,
+	    PIPELINESTAT_STOP                             = 0x1a,
+	    PERFCOUNTER_SAMPLE                            = 0x1b,
+	    FLUSH_ES_OUTPUT                               = 0x1c,
+	    FLUSH_GS_OUTPUT                               = 0x1d,
+	    SAMPLE_PIPELINESTAT                           = 0x1e,
+	    SO_VGTSTREAMOUT_FLUSH                         = 0x1f,
+	    SAMPLE_STREAMOUTSTATS                         = 0x20,
+	    RESET_VTX_CNT                                 = 0x21,
+	    BLOCK_CONTEXT_DONE                            = 0x22,
+	    CS_CONTEXT_DONE                               = 0x23,
+	    VGT_FLUSH                                     = 0x24,
+	    SQ_NON_EVENT                                  = 0x26,
+	    SC_SEND_DB_VPZ                                = 0x27,
+	    BOTTOM_OF_PIPE_TS                             = 0x28,
+	    FLUSH_SX_TS                                   = 0x29,
+	    DB_CACHE_FLUSH_AND_INV                        = 0x2a,
+	    FLUSH_AND_INV_DB_DATA_TS                      = 0x2b,
+	    FLUSH_AND_INV_DB_META                         = 0x2c,
+	    FLUSH_AND_INV_CB_DATA_TS                      = 0x2d,
+	    FLUSH_AND_INV_CB_META                         = 0x2e,
+	    CS_DONE                                       = 0x2f,
+	    PS_DONE                                       = 0x30,
+	    FLUSH_AND_INV_CB_PIXEL_DATA                   = 0x31,
+	    SX_CB_RAT_ACK_REQUEST                         = 0x32,
+	ADDRESS_HI_mask                                   = 0x1ff << 18,
+	ADDRESS_HI_shift                                  = 18,
+	EXTENDED_EVENT_bit                                = 1 << 27,
+    VGT_MULTI_PRIM_IB_RESET_EN                            = 0x00028a94,
+	RESET_EN_bit                                      = 1 << 0,
+    VGT_INSTANCE_STEP_RATE_0                              = 0x00028aa0,
+    VGT_INSTANCE_STEP_RATE_1                              = 0x00028aa4,
+    VGT_REUSE_OFF                                         = 0x00028ab4,
+	REUSE_OFF_bit                                     = 1 << 0,
+    VGT_VTX_CNT_EN                                        = 0x00028ab8,
+	VTX_CNT_EN_bit                                    = 1 << 0,
+    DB_HTILE_SURFACE                                      = 0x00028abc,
+	HTILE_WIDTH_bit                                   = 1 << 0,
+	HTILE_HEIGHT_bit                                  = 1 << 1,
+	LINEAR_bit                                        = 1 << 2,
+	FULL_CACHE_bit                                    = 1 << 3,
+	HTILE_USES_PRELOAD_WIN_bit                        = 1 << 4,
+	PRELOAD_bit                                       = 1 << 5,
+	PREFETCH_WIDTH_mask                               = 0x3f << 6,
+	PREFETCH_WIDTH_shift                              = 6,
+	PREFETCH_HEIGHT_mask                              = 0x3f << 12,
+	PREFETCH_HEIGHT_shift                             = 12,
+    DB_SRESULTS_COMPARE_STATE0                            = 0x00028ac0,
+	COMPAREFUNC0_mask                                 = 0x07 << 0,
+	COMPAREFUNC0_shift                                = 0,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	COMPAREVALUE0_mask                                = 0xff << 4,
+	COMPAREVALUE0_shift                               = 4,
+	COMPAREMASK0_mask                                 = 0xff << 12,
+	COMPAREMASK0_shift                                = 12,
+	ENABLE0_bit                                       = 1 << 24,
+    DB_SRESULTS_COMPARE_STATE1                            = 0x00028ac4,
+	COMPAREFUNC1_mask                                 = 0x07 << 0,
+	COMPAREFUNC1_shift                                = 0,
+/* 	    REF_NEVER                                     = 0x00, */
+/* 	    REF_LESS                                      = 0x01, */
+/* 	    REF_EQUAL                                     = 0x02, */
+/* 	    REF_LEQUAL                                    = 0x03, */
+/* 	    REF_GREATER                                   = 0x04, */
+/* 	    REF_NOTEQUAL                                  = 0x05, */
+/* 	    REF_GEQUAL                                    = 0x06, */
+/* 	    REF_ALWAYS                                    = 0x07, */
+	COMPAREVALUE1_mask                                = 0xff << 4,
+	COMPAREVALUE1_shift                               = 4,
+	COMPAREMASK1_mask                                 = 0xff << 12,
+	COMPAREMASK1_shift                                = 12,
+	ENABLE1_bit                                       = 1 << 24,
+    DB_PRELOAD_CONTROL                                    = 0x00028ac8,
+	START_X_mask                                      = 0xff << 0,
+	START_X_shift                                     = 0,
+	START_Y_mask                                      = 0xff << 8,
+	START_Y_shift                                     = 8,
+	MAX_X_mask                                        = 0xff << 16,
+	MAX_X_shift                                       = 16,
+	MAX_Y_mask                                        = 0xff << 24,
+	MAX_Y_shift                                       = 24,
+    VGT_STRMOUT_BUFFER_SIZE_0                             = 0x00028ad0,
+    VGT_STRMOUT_VTX_STRIDE_0                              = 0x00028ad4,
+	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_0__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_0                             = 0x00028ad8,
+    VGT_STRMOUT_BUFFER_OFFSET_0                           = 0x00028adc,
+    VGT_STRMOUT_BUFFER_SIZE_1                             = 0x00028ae0,
+    VGT_STRMOUT_VTX_STRIDE_1                              = 0x00028ae4,
+	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_1__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_1                             = 0x00028ae8,
+    VGT_STRMOUT_BUFFER_OFFSET_1                           = 0x00028aec,
+    VGT_STRMOUT_BUFFER_SIZE_2                             = 0x00028af0,
+    VGT_STRMOUT_VTX_STRIDE_2                              = 0x00028af4,
+	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_2__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_2                             = 0x00028af8,
+    VGT_STRMOUT_BUFFER_OFFSET_2                           = 0x00028afc,
+    VGT_STRMOUT_BUFFER_SIZE_3                             = 0x00028b00,
+    VGT_STRMOUT_VTX_STRIDE_3                              = 0x00028b04,
+	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_mask             = 0x3ff << 0,
+	VGT_STRMOUT_VTX_STRIDE_3__STRIDE_shift            = 0,
+    VGT_STRMOUT_BUFFER_BASE_3                             = 0x00028b08,
+    VGT_STRMOUT_BUFFER_OFFSET_3                           = 0x00028b0c,
+    VGT_STRMOUT_BASE_OFFSET_0                             = 0x00028b10,
+    VGT_STRMOUT_BASE_OFFSET_1                             = 0x00028b14,
+    VGT_STRMOUT_BASE_OFFSET_2                             = 0x00028b18,
+    VGT_STRMOUT_BASE_OFFSET_3                             = 0x00028b1c,
+    VGT_STRMOUT_DRAW_OPAQUE_OFFSET                        = 0x00028b28,
+    VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE            = 0x00028b2c,
+    VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE                 = 0x00028b30,
+	VERTEX_STRIDE_mask                                = 0x1ff << 0,
+	VERTEX_STRIDE_shift                               = 0,
+    VGT_GS_MAX_VERT_OUT                                   = 0x00028b38,
+	MAX_VERT_OUT_mask                                 = 0x7ff << 0,
+	MAX_VERT_OUT_shift                                = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_0                          = 0x00028b44,
+	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_0__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_1                          = 0x00028b48,
+	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_1__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_2                          = 0x00028b4c,
+	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_2__BASE_OFFSET_shift   = 0,
+    VGT_STRMOUT_BASE_OFFSET_HI_3                          = 0x00028b50,
+	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_mask    = 0x3f << 0,
+	VGT_STRMOUT_BASE_OFFSET_HI_3__BASE_OFFSET_shift   = 0,
+    VGT_SHADER_STAGES_EN                                  = 0x00028b54,
+	LS_EN_mask                                        = 0x03 << 0,
+	LS_EN_shift                                       = 0,
+	    LS_STAGE_OFF                                  = 0x00,
+	    LS_STAGE_ON                                   = 0x01,
+	    CS_STAGE_ON                                   = 0x02,
+	HS_EN_bit                                         = 1 << 2,
+	ES_EN_mask                                        = 0x03 << 3,
+	ES_EN_shift                                       = 3,
+	    ES_STAGE_OFF                                  = 0x00,
+	    ES_STAGE_DS                                   = 0x01,
+	    ES_STAGE_REAL                                 = 0x02,
+	GS_EN_bit                                         = 1 << 5,
+	VS_EN_mask                                        = 0x03 << 6,
+	VS_EN_shift                                       = 6,
+	    VS_STAGE_REAL                                 = 0x00,
+	    VS_STAGE_DS                                   = 0x01,
+	    VS_STAGE_COPY_SHADER                          = 0x02,
+	DYNAMIC_HS_bit                                    = 1 << 8,
+    VGT_LS_HS_CONFIG                                      = 0x00028b58,
+	NUM_PATCHES_mask                                  = 0xff << 0,
+	NUM_PATCHES_shift                                 = 0,
+	HS_NUM_INPUT_CP_mask                              = 0x3f << 8,
+	HS_NUM_INPUT_CP_shift                             = 8,
+	HS_NUM_OUTPUT_CP_mask                             = 0x3f << 14,
+	HS_NUM_OUTPUT_CP_shift                            = 14,
+    DB_ALPHA_TO_MASK                                      = 0x00028b70,
+	ALPHA_TO_MASK_ENABLE_bit                          = 1 << 0,
+	ALPHA_TO_MASK_OFFSET0_mask                        = 0x03 << 8,
+	ALPHA_TO_MASK_OFFSET0_shift                       = 8,
+	ALPHA_TO_MASK_OFFSET1_mask                        = 0x03 << 10,
+	ALPHA_TO_MASK_OFFSET1_shift                       = 10,
+	ALPHA_TO_MASK_OFFSET2_mask                        = 0x03 << 12,
+	ALPHA_TO_MASK_OFFSET2_shift                       = 12,
+	ALPHA_TO_MASK_OFFSET3_mask                        = 0x03 << 14,
+	ALPHA_TO_MASK_OFFSET3_shift                       = 14,
+	OFFSET_ROUND_bit                                  = 1 << 16,
+    PA_SU_POLY_OFFSET_DB_FMT_CNTL                         = 0x00028b78,
+	POLY_OFFSET_NEG_NUM_DB_BITS_mask                  = 0xff << 0,
+	POLY_OFFSET_NEG_NUM_DB_BITS_shift                 = 0,
+	POLY_OFFSET_DB_IS_FLOAT_FMT_bit                   = 1 << 8,
+    PA_SU_POLY_OFFSET_CLAMP                               = 0x00028b7c,
+    PA_SU_POLY_OFFSET_FRONT_SCALE                         = 0x00028b80,
+    PA_SU_POLY_OFFSET_FRONT_OFFSET                        = 0x00028b84,
+    PA_SU_POLY_OFFSET_BACK_SCALE                          = 0x00028b88,
+    PA_SU_POLY_OFFSET_BACK_OFFSET                         = 0x00028b8c,
+    VGT_GS_INSTANCE_CNT                                   = 0x00028b90,
+	VGT_GS_INSTANCE_CNT__ENABLE_bit                   = 1 << 0,
+	CNT_mask                                          = 0x7f << 2,
+	CNT_shift                                         = 2,
+    VGT_STRMOUT_CONFIG                                    = 0x00028b94,
+	STREAMOUT_0_EN_bit                                = 1 << 0,
+	STREAMOUT_1_EN_bit                                = 1 << 1,
+	STREAMOUT_2_EN_bit                                = 1 << 2,
+	STREAMOUT_3_EN_bit                                = 1 << 3,
+	RAST_STREAM_mask                                  = 0x07 << 4,
+	RAST_STREAM_shift                                 = 4,
+    VGT_STRMOUT_BUFFER_CONFIG                             = 0x00028b98,
+	STREAM_0_BUFFER_EN_mask                           = 0x0f << 0,
+	STREAM_0_BUFFER_EN_shift                          = 0,
+	STREAM_1_BUFFER_EN_mask                           = 0x0f << 4,
+	STREAM_1_BUFFER_EN_shift                          = 4,
+	STREAM_2_BUFFER_EN_mask                           = 0x0f << 8,
+	STREAM_2_BUFFER_EN_shift                          = 8,
+	STREAM_3_BUFFER_EN_mask                           = 0x0f << 12,
+	STREAM_3_BUFFER_EN_shift                          = 12,
+    CB_IMMED0_BASE                                        = 0x00028b9c,
+	CB_IMMED0_BASE_num                                = 12,
+    PA_SC_CENTROID_PRIORITY_0                             = 0x00028bd4,
+	DISTANCE_0_mask                                   = 0x0f << 0,
+	DISTANCE_0_shift                                  = 0,
+	DISTANCE_1_mask                                   = 0x0f << 4,
+	DISTANCE_1_shift                                  = 4,
+	DISTANCE_2_mask                                   = 0x0f << 8,
+	DISTANCE_2_shift                                  = 8,
+	DISTANCE_3_mask                                   = 0x0f << 12,
+	DISTANCE_3_shift                                  = 12,
+	DISTANCE_4_mask                                   = 0x0f << 16,
+	DISTANCE_4_shift                                  = 16,
+	DISTANCE_5_mask                                   = 0x0f << 20,
+	DISTANCE_5_shift                                  = 20,
+	DISTANCE_6_mask                                   = 0x0f << 24,
+	DISTANCE_6_shift                                  = 24,
+	DISTANCE_7_mask                                   = 0x0f << 28,
+	DISTANCE_7_shift                                  = 28,
+    PA_SC_CENTROID_PRIORITY_1                             = 0x00028bd8,
+	DISTANCE_8_mask                                   = 0x0f << 0,
+	DISTANCE_8_shift                                  = 0,
+	DISTANCE_9_mask                                   = 0x0f << 4,
+	DISTANCE_9_shift                                  = 4,
+	DISTANCE_10_mask                                  = 0x0f << 8,
+	DISTANCE_10_shift                                 = 8,
+	DISTANCE_11_mask                                  = 0x0f << 12,
+	DISTANCE_11_shift                                 = 12,
+	DISTANCE_12_mask                                  = 0x0f << 16,
+	DISTANCE_12_shift                                 = 16,
+	DISTANCE_13_mask                                  = 0x0f << 20,
+	DISTANCE_13_shift                                 = 20,
+	DISTANCE_14_mask                                  = 0x0f << 24,
+	DISTANCE_14_shift                                 = 24,
+	DISTANCE_15_mask                                  = 0x0f << 28,
+	DISTANCE_15_shift                                 = 28,
+    PA_SC_LINE_CNTL                                       = 0x00028bdc,
+	EXPAND_LINE_WIDTH_bit                             = 1 << 9,
+	LAST_PIXEL_bit                                    = 1 << 10,
+	PERPENDICULAR_ENDCAP_ENA_bit                      = 1 << 11,
+	DX10_DIAMOND_TEST_ENA_bit                         = 1 << 12,
+    PA_SC_AA_CONFIG                                       = 0x00028be0,
+	MSAA_NUM_SAMPLES_mask                             = 0x07 << 0,
+	MSAA_NUM_SAMPLES_shift                            = 0,
+	AA_MASK_CENTROID_DTMN_bit                         = 1 << 4,
+	MAX_SAMPLE_DIST_mask                              = 0x0f << 13,
+	MAX_SAMPLE_DIST_shift                             = 13,
+	MSAA_EXPOSED_SAMPLES_mask                         = 0x07 << 20,
+	MSAA_EXPOSED_SAMPLES_shift                        = 20,
+	DETAIL_TO_EXPOSED_MODE_mask                       = 0x03 << 24,
+	DETAIL_TO_EXPOSED_MODE_shift                      = 24,
+    PA_SU_VTX_CNTL                                        = 0x00028be4,
+	PIX_CENTER_bit                                    = 1 << 0,
+	PA_SU_VTX_CNTL__ROUND_MODE_mask                   = 0x03 << 1,
+	PA_SU_VTX_CNTL__ROUND_MODE_shift                  = 1,
+	    X_TRUNCATE                                    = 0x00,
+	    X_ROUND                                       = 0x01,
+	    X_ROUND_TO_EVEN                               = 0x02,
+	    X_ROUND_TO_ODD                                = 0x03,
+	QUANT_MODE_mask                                   = 0x07 << 3,
+	QUANT_MODE_shift                                  = 3,
+	    X_1_16TH                                      = 0x00,
+	    X_1_8TH                                       = 0x01,
+	    X_1_4TH                                       = 0x02,
+	    X_1_2                                         = 0x03,
+	    QUANT_MODE__X_1                               = 0x04,
+	    X_1_256TH                                     = 0x05,
+	    X_1_1024TH                                    = 0x06,
+	    X_1_4096TH                                    = 0x07,
+    PA_CL_GB_VERT_CLIP_ADJ                                = 0x00028be8,
+    PA_CL_GB_HORZ_CLIP_ADJ                                = 0x00028bf0,
+    PA_CL_GB_HORZ_DISC_ADJ                                = 0x00028bf4,
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0                     = 0x00028bf8,
+	S0_X_mask                                         = 0x0f << 0,
+	S0_X_shift                                        = 0,
+	S0_Y_mask                                         = 0x0f << 4,
+	S0_Y_shift                                        = 4,
+	S1_X_mask                                         = 0x0f << 8,
+	S1_X_shift                                        = 8,
+	S1_Y_mask                                         = 0x0f << 12,
+	S1_Y_shift                                        = 12,
+	S2_X_mask                                         = 0x0f << 16,
+	S2_X_shift                                        = 16,
+	S2_Y_mask                                         = 0x0f << 20,
+	S2_Y_shift                                        = 20,
+	S3_X_mask                                         = 0x0f << 24,
+	S3_X_shift                                        = 24,
+	S3_Y_mask                                         = 0x0f << 28,
+	S3_Y_shift                                        = 28,
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1                     = 0x00028bfc,
+	S4_X_mask                                         = 0x0f << 0,
+	S4_X_shift                                        = 0,
+	S4_Y_mask                                         = 0x0f << 4,
+	S4_Y_shift                                        = 4,
+	S5_X_mask                                         = 0x0f << 8,
+	S5_X_shift                                        = 8,
+	S5_Y_mask                                         = 0x0f << 12,
+	S5_Y_shift                                        = 12,
+	S6_X_mask                                         = 0x0f << 16,
+	S6_X_shift                                        = 16,
+	S6_Y_mask                                         = 0x0f << 20,
+	S6_Y_shift                                        = 20,
+	S7_X_mask                                         = 0x0f << 24,
+	S7_X_shift                                        = 24,
+	S7_Y_mask                                         = 0x0f << 28,
+	S7_Y_shift                                        = 28,
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_2                     = 0x00028c00,
+	S8_X_mask                                         = 0x0f << 0,
+	S8_X_shift                                        = 0,
+	S8_Y_mask                                         = 0x0f << 4,
+	S8_Y_shift                                        = 4,
+	S9_X_mask                                         = 0x0f << 8,
+	S9_X_shift                                        = 8,
+	S9_Y_mask                                         = 0x0f << 12,
+	S9_Y_shift                                        = 12,
+	S10_X_mask                                        = 0x0f << 16,
+	S10_X_shift                                       = 16,
+	S10_Y_mask                                        = 0x0f << 20,
+	S10_Y_shift                                       = 20,
+	S11_X_mask                                        = 0x0f << 24,
+	S11_X_shift                                       = 24,
+	S11_Y_mask                                        = 0x0f << 28,
+	S11_Y_shift                                       = 28,
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_3                     = 0x00028c04,
+	S12_X_mask                                        = 0x0f << 0,
+	S12_X_shift                                       = 0,
+	S12_Y_mask                                        = 0x0f << 4,
+	S12_Y_shift                                       = 4,
+	S13_X_mask                                        = 0x0f << 8,
+	S13_X_shift                                       = 8,
+	S13_Y_mask                                        = 0x0f << 12,
+	S13_Y_shift                                       = 12,
+	S14_X_mask                                        = 0x0f << 16,
+	S14_X_shift                                       = 16,
+	S14_Y_mask                                        = 0x0f << 20,
+	S14_Y_shift                                       = 20,
+	S15_X_mask                                        = 0x0f << 24,
+	S15_X_shift                                       = 24,
+	S15_Y_mask                                        = 0x0f << 28,
+	S15_Y_shift                                       = 28,
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0                     = 0x00028c08,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1                     = 0x00028c0c,
+/* 	S4_X_mask                                         = 0x0f << 0, */
+/* 	S4_X_shift                                        = 0, */
+/* 	S4_Y_mask                                         = 0x0f << 4, */
+/* 	S4_Y_shift                                        = 4, */
+/* 	S5_X_mask                                         = 0x0f << 8, */
+/* 	S5_X_shift                                        = 8, */
+/* 	S5_Y_mask                                         = 0x0f << 12, */
+/* 	S5_Y_shift                                        = 12, */
+/* 	S6_X_mask                                         = 0x0f << 16, */
+/* 	S6_X_shift                                        = 16, */
+/* 	S6_Y_mask                                         = 0x0f << 20, */
+/* 	S6_Y_shift                                        = 20, */
+/* 	S7_X_mask                                         = 0x0f << 24, */
+/* 	S7_X_shift                                        = 24, */
+/* 	S7_Y_mask                                         = 0x0f << 28, */
+/* 	S7_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_2                     = 0x00028c10,
+/* 	S8_X_mask                                         = 0x0f << 0, */
+/* 	S8_X_shift                                        = 0, */
+/* 	S8_Y_mask                                         = 0x0f << 4, */
+/* 	S8_Y_shift                                        = 4, */
+/* 	S9_X_mask                                         = 0x0f << 8, */
+/* 	S9_X_shift                                        = 8, */
+/* 	S9_Y_mask                                         = 0x0f << 12, */
+/* 	S9_Y_shift                                        = 12, */
+/* 	S10_X_mask                                        = 0x0f << 16, */
+/* 	S10_X_shift                                       = 16, */
+/* 	S10_Y_mask                                        = 0x0f << 20, */
+/* 	S10_Y_shift                                       = 20, */
+/* 	S11_X_mask                                        = 0x0f << 24, */
+/* 	S11_X_shift                                       = 24, */
+/* 	S11_Y_mask                                        = 0x0f << 28, */
+/* 	S11_Y_shift                                       = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_3                     = 0x00028c14,
+/* 	S12_X_mask                                        = 0x0f << 0, */
+/* 	S12_X_shift                                       = 0, */
+/* 	S12_Y_mask                                        = 0x0f << 4, */
+/* 	S12_Y_shift                                       = 4, */
+/* 	S13_X_mask                                        = 0x0f << 8, */
+/* 	S13_X_shift                                       = 8, */
+/* 	S13_Y_mask                                        = 0x0f << 12, */
+/* 	S13_Y_shift                                       = 12, */
+/* 	S14_X_mask                                        = 0x0f << 16, */
+/* 	S14_X_shift                                       = 16, */
+/* 	S14_Y_mask                                        = 0x0f << 20, */
+/* 	S14_Y_shift                                       = 20, */
+/* 	S15_X_mask                                        = 0x0f << 24, */
+/* 	S15_X_shift                                       = 24, */
+/* 	S15_Y_mask                                        = 0x0f << 28, */
+/* 	S15_Y_shift                                       = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0                     = 0x00028c18,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1                     = 0x00028c1c,
+/* 	S4_X_mask                                         = 0x0f << 0, */
+/* 	S4_X_shift                                        = 0, */
+/* 	S4_Y_mask                                         = 0x0f << 4, */
+/* 	S4_Y_shift                                        = 4, */
+/* 	S5_X_mask                                         = 0x0f << 8, */
+/* 	S5_X_shift                                        = 8, */
+/* 	S5_Y_mask                                         = 0x0f << 12, */
+/* 	S5_Y_shift                                        = 12, */
+/* 	S6_X_mask                                         = 0x0f << 16, */
+/* 	S6_X_shift                                        = 16, */
+/* 	S6_Y_mask                                         = 0x0f << 20, */
+/* 	S6_Y_shift                                        = 20, */
+/* 	S7_X_mask                                         = 0x0f << 24, */
+/* 	S7_X_shift                                        = 24, */
+/* 	S7_Y_mask                                         = 0x0f << 28, */
+/* 	S7_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_2                     = 0x00028c20,
+/* 	S8_X_mask                                         = 0x0f << 0, */
+/* 	S8_X_shift                                        = 0, */
+/* 	S8_Y_mask                                         = 0x0f << 4, */
+/* 	S8_Y_shift                                        = 4, */
+/* 	S9_X_mask                                         = 0x0f << 8, */
+/* 	S9_X_shift                                        = 8, */
+/* 	S9_Y_mask                                         = 0x0f << 12, */
+/* 	S9_Y_shift                                        = 12, */
+/* 	S10_X_mask                                        = 0x0f << 16, */
+/* 	S10_X_shift                                       = 16, */
+/* 	S10_Y_mask                                        = 0x0f << 20, */
+/* 	S10_Y_shift                                       = 20, */
+/* 	S11_X_mask                                        = 0x0f << 24, */
+/* 	S11_X_shift                                       = 24, */
+/* 	S11_Y_mask                                        = 0x0f << 28, */
+/* 	S11_Y_shift                                       = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_3                     = 0x00028c24,
+/* 	S12_X_mask                                        = 0x0f << 0, */
+/* 	S12_X_shift                                       = 0, */
+/* 	S12_Y_mask                                        = 0x0f << 4, */
+/* 	S12_Y_shift                                       = 4, */
+/* 	S13_X_mask                                        = 0x0f << 8, */
+/* 	S13_X_shift                                       = 8, */
+/* 	S13_Y_mask                                        = 0x0f << 12, */
+/* 	S13_Y_shift                                       = 12, */
+/* 	S14_X_mask                                        = 0x0f << 16, */
+/* 	S14_X_shift                                       = 16, */
+/* 	S14_Y_mask                                        = 0x0f << 20, */
+/* 	S14_Y_shift                                       = 20, */
+/* 	S15_X_mask                                        = 0x0f << 24, */
+/* 	S15_X_shift                                       = 24, */
+/* 	S15_Y_mask                                        = 0x0f << 28, */
+/* 	S15_Y_shift                                       = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0                     = 0x00028c28,
+/* 	S0_X_mask                                         = 0x0f << 0, */
+/* 	S0_X_shift                                        = 0, */
+/* 	S0_Y_mask                                         = 0x0f << 4, */
+/* 	S0_Y_shift                                        = 4, */
+/* 	S1_X_mask                                         = 0x0f << 8, */
+/* 	S1_X_shift                                        = 8, */
+/* 	S1_Y_mask                                         = 0x0f << 12, */
+/* 	S1_Y_shift                                        = 12, */
+/* 	S2_X_mask                                         = 0x0f << 16, */
+/* 	S2_X_shift                                        = 16, */
+/* 	S2_Y_mask                                         = 0x0f << 20, */
+/* 	S2_Y_shift                                        = 20, */
+/* 	S3_X_mask                                         = 0x0f << 24, */
+/* 	S3_X_shift                                        = 24, */
+/* 	S3_Y_mask                                         = 0x0f << 28, */
+/* 	S3_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1                     = 0x00028c2c,
+/* 	S4_X_mask                                         = 0x0f << 0, */
+/* 	S4_X_shift                                        = 0, */
+/* 	S4_Y_mask                                         = 0x0f << 4, */
+/* 	S4_Y_shift                                        = 4, */
+/* 	S5_X_mask                                         = 0x0f << 8, */
+/* 	S5_X_shift                                        = 8, */
+/* 	S5_Y_mask                                         = 0x0f << 12, */
+/* 	S5_Y_shift                                        = 12, */
+/* 	S6_X_mask                                         = 0x0f << 16, */
+/* 	S6_X_shift                                        = 16, */
+/* 	S6_Y_mask                                         = 0x0f << 20, */
+/* 	S6_Y_shift                                        = 20, */
+/* 	S7_X_mask                                         = 0x0f << 24, */
+/* 	S7_X_shift                                        = 24, */
+/* 	S7_Y_mask                                         = 0x0f << 28, */
+/* 	S7_Y_shift                                        = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_2                     = 0x00028c30,
+/* 	S8_X_mask                                         = 0x0f << 0, */
+/* 	S8_X_shift                                        = 0, */
+/* 	S8_Y_mask                                         = 0x0f << 4, */
+/* 	S8_Y_shift                                        = 4, */
+/* 	S9_X_mask                                         = 0x0f << 8, */
+/* 	S9_X_shift                                        = 8, */
+/* 	S9_Y_mask                                         = 0x0f << 12, */
+/* 	S9_Y_shift                                        = 12, */
+/* 	S10_X_mask                                        = 0x0f << 16, */
+/* 	S10_X_shift                                       = 16, */
+/* 	S10_Y_mask                                        = 0x0f << 20, */
+/* 	S10_Y_shift                                       = 20, */
+/* 	S11_X_mask                                        = 0x0f << 24, */
+/* 	S11_X_shift                                       = 24, */
+/* 	S11_Y_mask                                        = 0x0f << 28, */
+/* 	S11_Y_shift                                       = 28, */
+    PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_3                     = 0x00028c34,
+/* 	S12_X_mask                                        = 0x0f << 0, */
+/* 	S12_X_shift                                       = 0, */
+/* 	S12_Y_mask                                        = 0x0f << 4, */
+/* 	S12_Y_shift                                       = 4, */
+/* 	S13_X_mask                                        = 0x0f << 8, */
+/* 	S13_X_shift                                       = 8, */
+/* 	S13_Y_mask                                        = 0x0f << 12, */
+/* 	S13_Y_shift                                       = 12, */
+/* 	S14_X_mask                                        = 0x0f << 16, */
+/* 	S14_X_shift                                       = 16, */
+/* 	S14_Y_mask                                        = 0x0f << 20, */
+/* 	S14_Y_shift                                       = 20, */
+/* 	S15_X_mask                                        = 0x0f << 24, */
+/* 	S15_X_shift                                       = 24, */
+/* 	S15_Y_mask                                        = 0x0f << 28, */
+/* 	S15_Y_shift                                       = 28, */
+    PA_SC_AA_MASK_X0Y0_X1Y0                               = 0x00028c38,
+	AA_MASK_X0Y0_mask                                 = 0xffff << 0,
+	AA_MASK_X0Y0_shift                                = 0,
+	AA_MASK_X1Y0_mask                                 = 0xffff << 16,
+	AA_MASK_X1Y0_shift                                = 16,
+    PA_SC_AA_MASK_X0Y1_X1Y1                               = 0x00028c3c,
+	AA_MASK_X0Y1_mask                                 = 0xffff << 0,
+	AA_MASK_X0Y1_shift                                = 0,
+	AA_MASK_X1Y1_mask                                 = 0xffff << 16,
+	AA_MASK_X1Y1_shift                                = 16,
+    VGT_VERTEX_REUSE_BLOCK_CNTL                           = 0x00028c58,
+	VTX_REUSE_DEPTH_mask                              = 0xff << 0,
+	VTX_REUSE_DEPTH_shift                             = 0,
+    VGT_OUT_DEALLOC_CNTL                                  = 0x00028c5c,
+	DEALLOC_DIST_mask                                 = 0x7f << 0,
+	DEALLOC_DIST_shift                                = 0,
+    CB_COLOR0_BASE                                        = 0x00028c60,
+	CB_COLOR0_BASE_num                                = 12,
+	CB_COLOR0_BASE_offset                             = 51,
+    CB_COLOR0_PITCH                                       = 0x00028c64,
+	CB_COLOR0_PITCH_num                               = 12,
+	CB_COLOR0_PITCH_offset                            = 51,
+	CB_COLOR0_PITCH__TILE_MAX_mask                    = 0x7ff << 0,
+	CB_COLOR0_PITCH__TILE_MAX_shift                   = 0,
+    CB_COLOR0_SLICE                                       = 0x00028c68,
+	CB_COLOR0_SLICE_num                               = 12,
+	CB_COLOR0_SLICE_offset                            = 51,
+	CB_COLOR0_SLICE__TILE_MAX_mask                    = 0x3fffff << 0,
+	CB_COLOR0_SLICE__TILE_MAX_shift                   = 0,
+    CB_COLOR0_VIEW                                        = 0x00028c6c,
+	CB_COLOR0_VIEW_num                                = 12,
+	CB_COLOR0_VIEW_offset                             = 51,
+/* 	SLICE_START_mask                                  = 0x7ff << 0, */
+/* 	SLICE_START_shift                                 = 0, */
+/* 	SLICE_MAX_mask                                    = 0x7ff << 13, */
+/* 	SLICE_MAX_shift                                   = 13, */
+    CB_COLOR0_INFO                                        = 0x00028c70,
+	CB_COLOR0_INFO_num                                = 12,
+	CB_COLOR0_INFO_offset                             = 51,
+	ENDIAN_mask                                       = 0x03 << 0,
+	ENDIAN_shift                                      = 0,
+	    ENDIAN_NONE                                   = 0x00,
+	    ENDIAN_8IN16                                  = 0x01,
+	    ENDIAN_8IN32                                  = 0x02,
+	    ENDIAN_8IN64                                  = 0x03,
+	CB_COLOR0_INFO__FORMAT_mask                       = 0x3f << 2,
+	CB_COLOR0_INFO__FORMAT_shift                      = 2,
+	    COLOR_INVALID                                 = 0x00,
+	    COLOR_8                                       = 0x01,
+	    COLOR_16                                      = 0x05,
+	    COLOR_16_FLOAT                                = 0x06,
+	    COLOR_8_8                                     = 0x07,
+	    COLOR_5_6_5                                   = 0x08,
+	    COLOR_1_5_5_5                                 = 0x0a,
+	    COLOR_4_4_4_4                                 = 0x0b,
+	    COLOR_5_5_5_1                                 = 0x0c,
+	    COLOR_32                                      = 0x0d,
+	    COLOR_32_FLOAT                                = 0x0e,
+	    COLOR_16_16                                   = 0x0f,
+	    COLOR_16_16_FLOAT                             = 0x10,
+	    COLOR_8_24                                    = 0x11,
+	    COLOR_24_8                                    = 0x13,
+	    COLOR_10_11_11                                = 0x15,
+	    COLOR_10_11_11_FLOAT                          = 0x16,
+	    COLOR_2_10_10_10                              = 0x19,
+	    COLOR_8_8_8_8                                 = 0x1a,
+	    COLOR_10_10_10_2                              = 0x1b,
+	    COLOR_X24_8_32_FLOAT                          = 0x1c,
+	    COLOR_32_32                                   = 0x1d,
+	    COLOR_32_32_FLOAT                             = 0x1e,
+	    COLOR_16_16_16_16                             = 0x1f,
+	    COLOR_16_16_16_16_FLOAT                       = 0x20,
+	    COLOR_32_32_32_32                             = 0x22,
+	    COLOR_32_32_32_32_FLOAT                       = 0x23,
+	CB_COLOR0_INFO__ARRAY_MODE_mask                   = 0x0f << 8,
+	CB_COLOR0_INFO__ARRAY_MODE_shift                  = 8,
+	    ARRAY_LINEAR_GENERAL                          = 0x00,
+	    ARRAY_LINEAR_ALIGNED                          = 0x01,
+/* 	    ARRAY_1D_TILED_THIN1                          = 0x02, */
+/* 	    ARRAY_2D_TILED_THIN1                          = 0x04, */
+	NUMBER_TYPE_mask                                  = 0x07 << 12,
+	NUMBER_TYPE_shift                                 = 12,
+	    NUMBER_UNORM                                  = 0x00,
+	    NUMBER_SNORM                                  = 0x01,
+	    NUMBER_UINT                                   = 0x04,
+	    NUMBER_SINT                                   = 0x05,
+	    NUMBER_SRGB                                   = 0x06,
+	    NUMBER_FLOAT                                  = 0x07,
+	COMP_SWAP_mask                                    = 0x03 << 15,
+	COMP_SWAP_shift                                   = 15,
+	    SWAP_STD                                      = 0x00,
+	    SWAP_ALT                                      = 0x01,
+	    SWAP_STD_REV                                  = 0x02,
+	    SWAP_ALT_REV                                  = 0x03,
+	FAST_CLEAR_bit                                    = 1 << 17,
+	COMPRESSION_bit                                   = 1 << 18,
+	BLEND_CLAMP_bit                                   = 1 << 19,
+	BLEND_BYPASS_bit                                  = 1 << 20,
+	SIMPLE_FLOAT_bit                                  = 1 << 21,
+	CB_COLOR0_INFO__ROUND_MODE_bit                    = 1 << 22,
+	TILE_COMPACT_bit                                  = 1 << 23,
+	SOURCE_FORMAT_mask                                = 0x03 << 24,
+	SOURCE_FORMAT_shift                               = 24,
+	    EXPORT_4C_32BPC                               = 0x00,
+	    EXPORT_4C_16BPC                               = 0x01,
+	    EXPORT_2C_32BPC_GR                            = 0x02,
+	    EXPORT_2C_32BPC_AR                            = 0x03,
+	RAT_bit                                           = 1 << 26,
+	RESOURCE_TYPE_mask                                = 0x07 << 27,
+	RESOURCE_TYPE_shift                               = 27,
+	    BUFFER                                        = 0x00,
+	    TEXTURE1D                                     = 0x01,
+	    TEXTURE1DARRAY                                = 0x02,
+	    TEXTURE2D                                     = 0x03,
+	    TEXTURE2DARRAY                                = 0x04,
+	    TEXTURE3D                                     = 0x05,
+	    STRUCTUREDBUFFER                              = 0x06,
+	SOURCE_NUMBER_TYPE_mask                           = 0x03 << 30,
+	SOURCE_NUMBER_TYPE_shift                          = 30,
+	    EXPORT_FLOAT                                  = 0x00,
+	    EXPORT_INT                                    = 0x01,
+	    EXPORT_UNORM                                  = 0x02,
+	    EXPORT_SNORM                                  = 0x03,
+    CB_COLOR0_ATTRIB                                      = 0x00028c74,
+	CB_COLOR0_ATTRIB_num                              = 12,
+	CB_COLOR0_ATTRIB_offset                           = 51,
+	IGNORE_SHADER_ENGINE_TILING_bit                   = 1 << 3,
+	CB_COLOR0_ATTRIB__NON_DISP_TILING_ORDER_bit       = 1 << 4,
+	CB_COLOR0_ATTRIB__TILE_SPLIT_mask                 = 0x0f << 5,
+	CB_COLOR0_ATTRIB__TILE_SPLIT_shift                = 5,
+/* 	    ADDR_SURF_TILE_SPLIT_64B                      = 0x00, */
+/* 	    ADDR_SURF_TILE_SPLIT_128B                     = 0x01, */
+/* 	    ADDR_SURF_TILE_SPLIT_256B                     = 0x02, */
+/* 	    ADDR_SURF_TILE_SPLIT_512B                     = 0x03, */
+/* 	    ADDR_SURF_TILE_SPLIT_1KB                      = 0x04, */
+/* 	    ADDR_SURF_TILE_SPLIT_2KB                      = 0x05, */
+/* 	    ADDR_SURF_TILE_SPLIT_4KB                      = 0x06, */
+	CB_COLOR0_ATTRIB__NUM_BANKS_mask                  = 0x03 << 10,
+	CB_COLOR0_ATTRIB__NUM_BANKS_shift                 = 10,
+/* 	    ADDR_SURF_2_BANK                              = 0x00, */
+/* 	    ADDR_SURF_4_BANK                              = 0x01, */
+/* 	    ADDR_SURF_8_BANK                              = 0x02, */
+/* 	    ADDR_SURF_16_BANK                             = 0x03, */
+	CB_COLOR0_ATTRIB__BANK_WIDTH_mask                 = 0x03 << 13,
+	CB_COLOR0_ATTRIB__BANK_WIDTH_shift                = 13,
+/* 	    ADDR_SURF_BANK_WIDTH_1                        = 0x00, */
+/* 	    ADDR_SURF_BANK_WIDTH_2                        = 0x01, */
+/* 	    ADDR_SURF_BANK_WIDTH_4                        = 0x02, */
+/* 	    ADDR_SURF_BANK_WIDTH_8                        = 0x03, */
+	CB_COLOR0_ATTRIB__BANK_HEIGHT_mask                = 0x03 << 16,
+	CB_COLOR0_ATTRIB__BANK_HEIGHT_shift               = 16,
+/* 	    ADDR_SURF_BANK_HEIGHT_1                       = 0x00, */
+/* 	    ADDR_SURF_BANK_HEIGHT_2                       = 0x01, */
+/* 	    ADDR_SURF_BANK_HEIGHT_4                       = 0x02, */
+/* 	    ADDR_SURF_BANK_HEIGHT_8                       = 0x03, */
+	CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_mask          = 0x03 << 19,
+	CB_COLOR0_ATTRIB__MACRO_TILE_ASPECT_shift         = 19,
+/* 	    ADDR_SURF_MACRO_ASPECT_1                      = 0x00, */
+/* 	    ADDR_SURF_MACRO_ASPECT_2                      = 0x01, */
+/* 	    ADDR_SURF_MACRO_ASPECT_4                      = 0x02, */
+/* 	    ADDR_SURF_MACRO_ASPECT_8                      = 0x03, */
+	FMASK_BANK_HEIGHT_mask                            = 0x03 << 22,
+	FMASK_BANK_HEIGHT_shift                           = 22,
+/* 	    ADDR_SURF_BANK_HEIGHT_1                       = 0x00, */
+/* 	    ADDR_SURF_BANK_HEIGHT_2                       = 0x01, */
+/* 	    ADDR_SURF_BANK_HEIGHT_4                       = 0x02, */
+/* 	    ADDR_SURF_BANK_HEIGHT_8                       = 0x03, */
+	CB_COLOR0_ATTRIB__NUM_SAMPLES_mask                = 0x07 << 24,
+	CB_COLOR0_ATTRIB__NUM_SAMPLES_shift               = 24,
+	NUM_FRAGMENTS_mask                                = 0x03 << 27,
+	NUM_FRAGMENTS_shift                               = 27,
+	FORCE_DST_ALPHA_1_bit                             = 1 << 31,
+    CB_COLOR0_DIM                                         = 0x00028c78,
+	CB_COLOR0_DIM_num                                 = 12,
+	CB_COLOR0_DIM_offset                              = 51,
+	WIDTH_MAX_mask                                    = 0xffff << 0,
+	WIDTH_MAX_shift                                   = 0,
+	HEIGHT_MAX_mask                                   = 0xffff << 16,
+	HEIGHT_MAX_shift                                  = 16,
+    CB_COLOR0_CMASK                                       = 0x00028c7c,
+	CB_COLOR0_CMASK_num                               = 8,
+	CB_COLOR0_CMASK_offset                            = 60,
+    CB_COLOR0_CMASK_SLICE                                 = 0x00028c80,
+	CB_COLOR0_CMASK_SLICE_num                         = 8,
+	CB_COLOR0_CMASK_SLICE_offset                      = 60,
+	CB_COLOR0_CMASK_SLICE__TILE_MAX_mask              = 0x3fff << 0,
+	CB_COLOR0_CMASK_SLICE__TILE_MAX_shift             = 0,
+    CB_COLOR0_FMASK                                       = 0x00028c84,
+	CB_COLOR0_FMASK_num                               = 8,
+	CB_COLOR0_FMASK_offset                            = 60,
+    CB_COLOR0_FMASK_SLICE                                 = 0x00028c88,
+	CB_COLOR0_FMASK_SLICE_num                         = 8,
+	CB_COLOR0_FMASK_SLICE_offset                      = 60,
+	CB_COLOR0_FMASK_SLICE__TILE_MAX_mask              = 0x3fffff << 0,
+	CB_COLOR0_FMASK_SLICE__TILE_MAX_shift             = 0,
+    CB_COLOR0_CLEAR_WORD0                                 = 0x00028c8c,
+	CB_COLOR0_CLEAR_WORD0_num                         = 8,
+	CB_COLOR0_CLEAR_WORD0_offset                      = 60,
+    CB_COLOR0_CLEAR_WORD1                                 = 0x00028c90,
+	CB_COLOR0_CLEAR_WORD1_num                         = 8,
+	CB_COLOR0_CLEAR_WORD1_offset                      = 60,
+    CB_COLOR0_CLEAR_WORD2                                 = 0x00028c94,
+	CB_COLOR0_CLEAR_WORD2_num                         = 8,
+	CB_COLOR0_CLEAR_WORD2_offset                      = 60,
+    CB_COLOR0_CLEAR_WORD3                                 = 0x00028c98,
+	CB_COLOR0_CLEAR_WORD3_num                         = 8,
+	CB_COLOR0_CLEAR_WORD3_offset                      = 60,
+    SQ_ALU_CONST_CACHE_HS_0                               = 0x00028f00,
+	SQ_ALU_CONST_CACHE_HS_0_num                       = 16,
+    SQ_ALU_CONST_CACHE_LS_0                               = 0x00028f40,
+	SQ_ALU_CONST_CACHE_LS_0_num                       = 16,
+    SQ_ALU_CONST_BUFFER_SIZE_HS_0                         = 0x00028f80,
+	SQ_ALU_CONST_BUFFER_SIZE_HS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_HS_0__DATA_shift         = 0,
+    SQ_ALU_CONST_BUFFER_SIZE_LS_0                         = 0x00028fc0,
+	SQ_ALU_CONST_BUFFER_SIZE_LS_0_num                 = 16,
+	SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_mask          = 0x1ff << 0,
+	SQ_ALU_CONST_BUFFER_SIZE_LS_0__DATA_shift         = 0,
+    SQ_VTX_CONSTANT_WORD0_0                               = 0x00030000,
+    SQ_TEX_RESOURCE_WORD0_0                               = 0x00030000,
+	DIM_mask                                          = 0x07 << 0,
+	DIM_shift                                         = 0,
+	    SQ_TEX_DIM_1D                                 = 0x00,
+	    SQ_TEX_DIM_2D                                 = 0x01,
+	    SQ_TEX_DIM_3D                                 = 0x02,
+	    SQ_TEX_DIM_CUBEMAP                            = 0x03,
+	    SQ_TEX_DIM_1D_ARRAY                           = 0x04,
+	    SQ_TEX_DIM_2D_ARRAY                           = 0x05,
+	    SQ_TEX_DIM_2D_MSAA                            = 0x06,
+	    SQ_TEX_DIM_2D_ARRAY_MSAA                      = 0x07,
+/* 	IGNORE_SHADER_ENGINE_TILING_bit                   = 1 << 3, */
+	SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_mask= 0x03 << 4,
+	SQ_TEX_RESOURCE_WORD0_0__NON_DISP_TILING_ORDER_shift= 4,
+	PITCH_mask                                        = 0xfff << 6,
+	PITCH_shift                                       = 6,
+	TEX_WIDTH_mask                                    = 0x3fff << 18,
+	TEX_WIDTH_shift                                   = 18,
+    SQ_VTX_CONSTANT_WORD1_0                               = 0x00030004,
+    SQ_TEX_RESOURCE_WORD1_0                               = 0x00030004,
+	TEX_HEIGHT_mask                                   = 0x3fff << 0,
+	TEX_HEIGHT_shift                                  = 0,
+	TEX_DEPTH_mask                                    = 0x1fff << 14,
+	TEX_DEPTH_shift                                   = 14,
+	SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_mask          = 0x0f << 28,
+	SQ_TEX_RESOURCE_WORD1_0__ARRAY_MODE_shift         = 28,
+    SQ_VTX_CONSTANT_WORD2_0                               = 0x00030008,
+	BASE_ADDRESS_HI_mask                              = 0xff << 0,
+	BASE_ADDRESS_HI_shift                             = 0,
+	SQ_VTX_CONSTANT_WORD2_0__STRIDE_mask              = 0xfff << 8,
+	SQ_VTX_CONSTANT_WORD2_0__STRIDE_shift             = 8,
+	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_mask         = 0x3f << 20,
+	SQ_VTX_CONSTANT_WORD2_0__DATA_FORMAT_shift        = 20,
+	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_mask      = 0x03 << 26,
+	SQ_VTX_CONSTANT_WORD2_0__NUM_FORMAT_ALL_shift     = 26,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_VTX_CONSTANT_WORD2_0__FORMAT_COMP_ALL_bit      = 1 << 28,
+	SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit         = 1 << 29,
+	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_mask         = 0x03 << 30,
+	SQ_VTX_CONSTANT_WORD2_0__ENDIAN_SWAP_shift        = 30,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+    SQ_TEX_RESOURCE_WORD2_0                               = 0x00030008,
+    SQ_VTX_CONSTANT_WORD3_0                               = 0x0003000c,
+	CACHE_SWIZZLE_bit                                 = 1 << 0,
+	SQ_VTX_CONSTANT_WORD3_0__UNCACHED_bit             = 1 << 2,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_mask           = 0x07 << 3,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_X_shift          = 3,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_mask           = 0x07 << 6,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Y_shift          = 6,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_mask           = 0x07 << 9,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_Z_shift          = 9,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_mask           = 0x07 << 12,
+	SQ_VTX_CONSTANT_WORD3_0__DST_SEL_W_shift          = 12,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+    SQ_TEX_RESOURCE_WORD3_0                               = 0x0003000c,
+    SQ_TEX_RESOURCE_WORD4_0                               = 0x00030010,
+	FORMAT_COMP_X_mask                                = 0x03 << 0,
+	FORMAT_COMP_X_shift                               = 0,
+	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00,
+	    SQ_FORMAT_COMP_SIGNED                         = 0x01,
+	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02,
+	FORMAT_COMP_Y_mask                                = 0x03 << 2,
+	FORMAT_COMP_Y_shift                               = 2,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	FORMAT_COMP_Z_mask                                = 0x03 << 4,
+	FORMAT_COMP_Z_shift                               = 4,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	FORMAT_COMP_W_mask                                = 0x03 << 6,
+	FORMAT_COMP_W_shift                               = 6,
+/* 	    SQ_FORMAT_COMP_UNSIGNED                       = 0x00, */
+/* 	    SQ_FORMAT_COMP_SIGNED                         = 0x01, */
+/* 	    SQ_FORMAT_COMP_UNSIGNED_BIASED                = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_mask      = 0x03 << 8,
+	SQ_TEX_RESOURCE_WORD4_0__NUM_FORMAT_ALL_shift     = 8,
+/* 	    SQ_NUM_FORMAT_NORM                            = 0x00, */
+/* 	    SQ_NUM_FORMAT_INT                             = 0x01, */
+/* 	    SQ_NUM_FORMAT_SCALED                          = 0x02, */
+	SQ_TEX_RESOURCE_WORD4_0__SRF_MODE_ALL_bit         = 1 << 10,
+	SQ_TEX_RESOURCE_WORD4_0__FORCE_DEGAMMA_bit        = 1 << 11,
+	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_mask         = 0x03 << 12,
+	SQ_TEX_RESOURCE_WORD4_0__ENDIAN_SWAP_shift        = 12,
+/* 	    SQ_ENDIAN_NONE                                = 0x00, */
+/* 	    SQ_ENDIAN_8IN16                               = 0x01, */
+/* 	    SQ_ENDIAN_8IN32                               = 0x02, */
+	LOG2_NUM_FRAGMENTS_mask                           = 0x03 << 14,
+	LOG2_NUM_FRAGMENTS_shift                          = 14,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_mask           = 0x07 << 16,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_X_shift          = 16,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_mask           = 0x07 << 19,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Y_shift          = 19,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_mask           = 0x07 << 22,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_Z_shift          = 22,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_mask           = 0x07 << 25,
+	SQ_TEX_RESOURCE_WORD4_0__DST_SEL_W_shift          = 25,
+/* 	    SQ_SEL_X                                      = 0x00, */
+/* 	    SQ_SEL_Y                                      = 0x01, */
+/* 	    SQ_SEL_Z                                      = 0x02, */
+/* 	    SQ_SEL_W                                      = 0x03, */
+/* 	    SQ_SEL_0                                      = 0x04, */
+/* 	    SQ_SEL_1                                      = 0x05, */
+	BASE_LEVEL_mask                                   = 0x0f << 28,
+	BASE_LEVEL_shift                                  = 28,
+    SQ_VTX_CONSTANT_WORD4_0                               = 0x00030010,
+    SQ_TEX_RESOURCE_WORD5_0                               = 0x00030014,
+	LAST_LEVEL_mask                                   = 0x0f << 0,
+	LAST_LEVEL_shift                                  = 0,
+	BASE_ARRAY_mask                                   = 0x1fff << 4,
+	BASE_ARRAY_shift                                  = 4,
+	LAST_ARRAY_mask                                   = 0x1fff << 17,
+	LAST_ARRAY_shift                                  = 17,
+    SQ_TEX_RESOURCE_WORD6_0                               = 0x00030018,
+	PERF_MODULATION_mask                              = 0x07 << 3,
+	PERF_MODULATION_shift                             = 3,
+	INTERLACED_bit                                    = 1 << 6,
+	SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_mask             = 0xfff << 8,
+	SQ_TEX_RESOURCE_WORD6_0__MIN_LOD_shift            = 8,
+	SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_mask          = 0x07 << 29,
+	SQ_TEX_RESOURCE_WORD6_0__TILE_SPLIT_shift         = 29,
+	    SQ_ADDR_SURF_TILE_SPLIT_64B                   = 0x00,
+	    SQ_ADDR_SURF_TILE_SPLIT_128B                  = 0x01,
+	    SQ_ADDR_SURF_TILE_SPLIT_256B                  = 0x02,
+	    SQ_ADDR_SURF_TILE_SPLIT_512B                  = 0x03,
+	    SQ_ADDR_SURF_TILE_SPLIT_1KB                   = 0x04,
+	    SQ_ADDR_SURF_TILE_SPLIT_2KB                   = 0x05,
+	    SQ_ADDR_SURF_TILE_SPLIT_4KB                   = 0x06,
+    SQ_VTX_CONSTANT_WORD7_0                               = 0x0003001c,
+	SQ_VTX_CONSTANT_WORD7_0__TYPE_mask                = 0x03 << 30,
+	SQ_VTX_CONSTANT_WORD7_0__TYPE_shift               = 30,
+	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00,
+	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01,
+	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02,
+	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03,
+    SQ_TEX_RESOURCE_WORD7_0                               = 0x0003001c,
+	SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_mask         = 0x3f << 0,
+	SQ_TEX_RESOURCE_WORD7_0__DATA_FORMAT_shift        = 0,
+	SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_mask   = 0x03 << 6,
+	SQ_TEX_RESOURCE_WORD7_0__MACRO_TILE_ASPECT_shift  = 6,
+	    SQ_ADDR_SURF_MACRO_ASPECT_1                   = 0x00,
+	    SQ_ADDR_SURF_MACRO_ASPECT_2                   = 0x01,
+	    SQ_ADDR_SURF_MACRO_ASPECT_4                   = 0x02,
+	    SQ_ADDR_SURF_MACRO_ASPECT_8                   = 0x03,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_mask          = 0x03 << 8,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_WIDTH_shift         = 8,
+	    SQ_ADDR_SURF_BANK_WH_1                        = 0x00,
+	    SQ_ADDR_SURF_BANK_WH_2                        = 0x01,
+	    SQ_ADDR_SURF_BANK_WH_4                        = 0x02,
+	    SQ_ADDR_SURF_BANK_WH_8                        = 0x03,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_mask         = 0x03 << 10,
+	SQ_TEX_RESOURCE_WORD7_0__BANK_HEIGHT_shift        = 10,
+/* 	    SQ_ADDR_SURF_BANK_WH_1                        = 0x00, */
+/* 	    SQ_ADDR_SURF_BANK_WH_2                        = 0x01, */
+/* 	    SQ_ADDR_SURF_BANK_WH_4                        = 0x02, */
+/* 	    SQ_ADDR_SURF_BANK_WH_8                        = 0x03, */
+	DEPTH_SAMPLE_ORDER_bit                            = 1 << 15,
+	SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_mask           = 0x03 << 16,
+	SQ_TEX_RESOURCE_WORD7_0__NUM_BANKS_shift          = 16,
+	    SQ_ADDR_SURF_2_BANK                           = 0x00,
+	    SQ_ADDR_SURF_4_BANK                           = 0x01,
+	    SQ_ADDR_SURF_8_BANK                           = 0x02,
+	    SQ_ADDR_SURF_16_BANK                          = 0x03,
+	SQ_TEX_RESOURCE_WORD7_0__TYPE_mask                = 0x03 << 30,
+	SQ_TEX_RESOURCE_WORD7_0__TYPE_shift               = 30,
+/* 	    SQ_TEX_VTX_INVALID_TEXTURE                    = 0x00, */
+/* 	    SQ_TEX_VTX_INVALID_BUFFER                     = 0x01, */
+/* 	    SQ_TEX_VTX_VALID_TEXTURE                      = 0x02, */
+/* 	    SQ_TEX_VTX_VALID_BUFFER                       = 0x03, */
+    SQ_LOOP_CONST_DX10_0                                  = 0x0003a200,
+    SQ_LOOP_CONST_0                                       = 0x0003a200,
+	SQ_LOOP_CONST_0__COUNT_mask                       = 0xfff << 0,
+	SQ_LOOP_CONST_0__COUNT_shift                      = 0,
+	INIT_mask                                         = 0xfff << 12,
+	INIT_shift                                        = 12,
+	INC_mask                                          = 0xff << 24,
+	INC_shift                                         = 24,
+    SQ_JUMPTABLE_CONST_0                                  = 0x0003a200,
+	CONST_A_mask                                      = 0xff << 0,
+	CONST_A_shift                                     = 0,
+	CONST_B_mask                                      = 0xff << 8,
+	CONST_B_shift                                     = 8,
+	CONST_C_mask                                      = 0xff << 16,
+	CONST_C_shift                                     = 16,
+	CONST_D_mask                                      = 0xff << 24,
+	CONST_D_shift                                     = 24,
+    SQ_BOOL_CONST_0                                       = 0x0003a500,
+	SQ_BOOL_CONST_0_num                               = 6,
+    SQ_TEX_SAMPLER_WORD0_0                                = 0x0003c000,
+	CLAMP_X_mask                                      = 0x07 << 0,
+	CLAMP_X_shift                                     = 0,
+	    SQ_TEX_WRAP                                   = 0x00,
+	    SQ_TEX_MIRROR                                 = 0x01,
+	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02,
+	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03,
+	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04,
+	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05,
+	    SQ_TEX_CLAMP_BORDER                           = 0x06,
+	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07,
+	CLAMP_Y_mask                                      = 0x07 << 3,
+	CLAMP_Y_shift                                     = 3,
+/* 	    SQ_TEX_WRAP                                   = 0x00, */
+/* 	    SQ_TEX_MIRROR                                 = 0x01, */
+/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+	CLAMP_Z_mask                                      = 0x07 << 6,
+	CLAMP_Z_shift                                     = 6,
+/* 	    SQ_TEX_WRAP                                   = 0x00, */
+/* 	    SQ_TEX_MIRROR                                 = 0x01, */
+/* 	    SQ_TEX_CLAMP_LAST_TEXEL                       = 0x02, */
+/* 	    SQ_TEX_MIRROR_ONCE_LAST_TEXEL                 = 0x03, */
+/* 	    SQ_TEX_CLAMP_HALF_BORDER                      = 0x04, */
+/* 	    SQ_TEX_MIRROR_ONCE_HALF_BORDER                = 0x05, */
+/* 	    SQ_TEX_CLAMP_BORDER                           = 0x06, */
+/* 	    SQ_TEX_MIRROR_ONCE_BORDER                     = 0x07, */
+	XY_MAG_FILTER_mask                                = 0x03 << 9,
+	XY_MAG_FILTER_shift                               = 9,
+	    SQ_TEX_XY_FILTER_POINT                        = 0x00,
+	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01,
+	XY_MIN_FILTER_mask                                = 0x03 << 11,
+	XY_MIN_FILTER_shift                               = 11,
+/* 	    SQ_TEX_XY_FILTER_POINT                        = 0x00, */
+/* 	    SQ_TEX_XY_FILTER_BILINEAR                     = 0x01, */
+	Z_FILTER_mask                                     = 0x03 << 13,
+	Z_FILTER_shift                                    = 13,
+	    SQ_TEX_Z_FILTER_NONE                          = 0x00,
+	    SQ_TEX_Z_FILTER_POINT                         = 0x01,
+	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02,
+	MIP_FILTER_mask                                   = 0x03 << 15,
+	MIP_FILTER_shift                                  = 15,
+/* 	    SQ_TEX_Z_FILTER_NONE                          = 0x00, */
+/* 	    SQ_TEX_Z_FILTER_POINT                         = 0x01, */
+/* 	    SQ_TEX_Z_FILTER_LINEAR                        = 0x02, */
+	BORDER_COLOR_TYPE_mask                            = 0x03 << 20,
+	BORDER_COLOR_TYPE_shift                           = 20,
+	    SQ_TEX_BORDER_COLOR_TRANS_BLACK               = 0x00,
+	    SQ_TEX_BORDER_COLOR_OPAQUE_BLACK              = 0x01,
+	    SQ_TEX_BORDER_COLOR_OPAQUE_WHITE              = 0x02,
+	    SQ_TEX_BORDER_COLOR_REGISTER                  = 0x03,
+	DEPTH_COMPARE_FUNCTION_mask                       = 0x07 << 22,
+	DEPTH_COMPARE_FUNCTION_shift                      = 22,
+	    SQ_TEX_DEPTH_COMPARE_NEVER                    = 0x00,
+	    SQ_TEX_DEPTH_COMPARE_LESS                     = 0x01,
+	    SQ_TEX_DEPTH_COMPARE_EQUAL                    = 0x02,
+	    SQ_TEX_DEPTH_COMPARE_LESSEQUAL                = 0x03,
+	    SQ_TEX_DEPTH_COMPARE_GREATER                  = 0x04,
+	    SQ_TEX_DEPTH_COMPARE_NOTEQUAL                 = 0x05,
+	    SQ_TEX_DEPTH_COMPARE_GREATEREQUAL             = 0x06,
+	    SQ_TEX_DEPTH_COMPARE_ALWAYS                   = 0x07,
+	FORCE_UNNORMALIZED_bit                            = 1 << 25,
+    SQ_TEX_SAMPLER_WORD1_0                                = 0x0003c004,
+	SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_mask              = 0xfff << 0,
+	SQ_TEX_SAMPLER_WORD1_0__MIN_LOD_shift             = 0,
+	MAX_LOD_mask                                      = 0xfff << 12,
+	MAX_LOD_shift                                     = 12,
+	PERF_MIP_mask                                     = 0x0f << 24,
+	PERF_MIP_shift                                    = 24,
+	PERF_Z_mask                                       = 0x0f << 28,
+	PERF_Z_shift                                      = 28,
+    SQ_TEX_SAMPLER_WORD2_0                                = 0x0003c008,
+	SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_mask             = 0x3fff << 0,
+	SQ_TEX_SAMPLER_WORD2_0__LOD_BIAS_shift            = 0,
+	LOD_BIAS_SEC_mask                                 = 0x3f << 14,
+	LOD_BIAS_SEC_shift                                = 14,
+	MC_COORD_TRUNCATE_bit                             = 1 << 20,
+	SQ_TEX_SAMPLER_WORD2_0__FORCE_DEGAMMA_bit         = 1 << 21,
+	TRUNCATE_COORD_bit                                = 1 << 28,
+	SQ_TEX_SAMPLER_WORD2_0__DISABLE_CUBE_WRAP_bit     = 1 << 29,
+	SQ_TEX_SAMPLER_WORD2_0__TYPE_bit                  = 1 << 31,
+    SQ_VTX_BASE_VTX_LOC                                   = 0x0003cff0,
+    SQ_VTX_START_INST_LOC                                 = 0x0003cff4,
+    SQ_TEX_SAMPLER_CLEAR                                  = 0x0003ff00,
+    SQ_TEX_RESOURCE_CLEAR                                 = 0x0003ff04,
+    SQ_LOOP_BOOL_CLEAR                                    = 0x0003ff08,
+    PA_CL_GB_VERT_DISC_ADJ                                = 0x0028be8c,
+
+} ;
+
+#endif /* _CAYMAN_REG_AUTO */
+
commit e1d28e011f4a5139cbc778973c63158ed2746716
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Mar 2 20:13:50 2011 -0500

    kms/cayman: stub out exa support
    
    Just fallbacks for now.
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 69d29a8..8879d01 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -59,7 +59,8 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     const_config_t ps_const_conf;
     struct r600_accel_object dst;
 
-    //return FALSE;
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
+	return FALSE;
 
     if (!RADEONCheckBPP(pPix->drawable.bitsPerPixel))
 	RADEON_FALLBACK(("EVERGREENCheckDatatype failed\n"));
@@ -431,7 +432,8 @@ EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     struct radeon_accel_state *accel_state = info->accel_state;
     struct r600_accel_object src_obj, dst_obj;
 
-    //return FALSE;
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
+	return FALSE;
 
     if (!RADEONCheckBPP(pSrc->drawable.bitsPerPixel))
 	RADEON_FALLBACK(("EVERGREENCheckDatatype src failed\n"));
@@ -1089,7 +1091,8 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     struct r600_accel_object src_obj, mask_obj, dst_obj;
     float *cbuf;
 
-    //return FALSE;
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
+	return FALSE;
 
     if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8)
 	return FALSE;
@@ -1428,6 +1431,9 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
 	}
     }
 
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
+	goto copy;
+
     scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
     height = RADEON_ALIGN(h, drmmode_get_height_align(pScrn, 0));
     base_align = drmmode_get_base_align(pScrn, (bpp / 8), 0);
@@ -1553,6 +1559,9 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
 
     }
 
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
+	goto copy;
+
     if (!accel_state->allowHWDFS)
 	goto copy;
 
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 6200cdc..ce5d2e1 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -114,6 +114,9 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     float *vs_alu_consts;
     const_config_t vs_const_conf;
 
+    if (info->ChipFamily == CHIP_FAMILY_CAYMAN)
+	return;
+
     cont = RTFContrast(pPriv->contrast);
     bright = RTFBrightness(pPriv->brightness);
     gamma = (float)pPriv->gamma / 1000.0;
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index aac17e2..ebbe30f 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -280,7 +280,6 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn)
     }
 
     if (xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE) ||
-	(info->ChipFamily >= CHIP_FAMILY_CAYMAN) ||
 	(!RADEONIsAccelWorking(pScrn))) {
 	xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 		   "GPU accel disabled or not working, using shadowfb for KMS\n");
commit 21e44a20b8b1b64079ee77f45aaa5010206ed7b6
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Thu May 12 09:23:38 2011 +0200

    UMS: Fix comparison of unsigned variable against < 0.
    
    Pointed out by clang:
    
    ../../src/radeon_crtc.c:242:18: error: comparison of unsigned expression < 0 is always false [-Werror,-Wtautological-compare]
                            error = error < 0 ? 0xffffffff : error;
                                    ~~~~~ ^ ~
    
    If a UMS regression is bisected to this commit, the assignment should probably
    just be removed, as it's a no-op in the current form.

diff --git a/src/radeon_crtc.c b/src/radeon_crtc.c
index d84112e..3c03700 100644
--- a/src/radeon_crtc.c
+++ b/src/radeon_crtc.c
@@ -239,7 +239,7 @@ RADEONComputePLL_old(RADEONPLLPtr pll,
 
 		    if (flags & RADEON_PLL_PREFER_CLOSEST_LOWER) {
 			error = freq - current_freq;
-			error = error < 0 ? 0xffffffff : error;
+			error = (int32_t)error < 0 ? 0xffffffff : error;
 		    } else
 			error = abs(current_freq - freq);
 		    vco_diff = abs(vco - best_vco);
commit 3b893d81982c9381393c92625e308541e0071b05
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Fri May 13 09:10:02 2011 +0200

    KMS: Fix output properties logic error.
    
    Pointed out by clang:
    
    ../../src/drmmode_display.c:1023:30: error: use of logical && with constant operand; switch to bitwise & or remove constant [-Werror,-Wconstant-logical-operand]
                    if (props && (props->flags && DRM_MODE_PROP_ENUM)) {
                                               ^  ~~~~~~~~~~~~~~~~~~
    
    Reviewed-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/drmmode_display.c b/src/drmmode_display.c
index 7873d57..afa4c26 100644
--- a/src/drmmode_display.c
+++ b/src/drmmode_display.c
@@ -1020,7 +1020,7 @@ drmmode_output_init(ScrnInfoPtr pScrn, drmmode_ptr drmmode, int num, int *num_dv
 
 	for (i = 0; i < koutput->count_props; i++) {
 		props = drmModeGetProperty(drmmode->fd, koutput->props[i]);
-		if (props && (props->flags && DRM_MODE_PROP_ENUM)) {
+		if (props && (props->flags & DRM_MODE_PROP_ENUM)) {
 			if (!strcmp(props->name, "DPMS")) {
 				drmmode_output->dpms_enum_id = koutput->props[i];
 				drmModeFreeProperty(props);
commit f83d58cf5b33686139067f8f898b8e566ba5c253
Author: Nicolas Kaiser <nikai at nikai.net>
Date:   Fri May 13 00:56:31 2011 +0200

    man: fix typos
    
    Signed-off-by: Nicolas Kaiser <nikai at nikai.net>

diff --git a/man/radeon.man b/man/radeon.man
index 4ec7650..b29f073 100644
--- a/man/radeon.man
+++ b/man/radeon.man
@@ -237,7 +237,7 @@ hardware acceleration.
 .TP
 .BI "Option \*qZaphodHeads\*q \*q" string \*q
 Specify the RandR output(s) to use with zaphod mode for a particular driver
-instance.  If you use this option you most use this option for all instances
+instance.  If you use this option you must use this option for all instances
 of the driver.
 .br
 For example:
@@ -355,7 +355,7 @@ The default is
 .BI "Option \*qBusType\*q \*q" string \*q
 Used to replace previous ForcePCIMode option.
 Should only be used when driver's bus detection is incorrect
-or you want to force a AGP card to PCI mode. You should NEVER force
+or you want to force an AGP card to PCI mode. You should NEVER force
 a PCI card to AGP bus.
 .br
 PCI    \-\- PCI bus
@@ -428,7 +428,7 @@ will assign the EDID from the file /tmp/edid1.bin to the output device
 VGA-0, and the EDID from the file /tmp/edid2.bin to the output device
 DVI-0 and force the DVI port to use the digital encoder.
 .br
-Note that a output name must always be specified,
+Note that an output name must always be specified,
 even if only one EDID is specified.
 .br
 .B
commit 90abffbd30f44b9cf76a6e28103ddcb5419b4522
Author: Ilija Hadzic <ihadzic at research.bell-labs.com>
Date:   Fri May 6 09:45:23 2011 -0400

    DRI2: fix high-crtc/vblank oversight/bug
    
    improvements to high-crtc handling done in
    f0b7d7b449cc77bb2b281d81108507f8bc2e6018 introduced a bug that caused
    the populate_vbl_request_type to never use the high-crtc field even
    when it should. The reason is that the offending patch put the code
    under #ifdef DRM_VBLANK_HIGH_CRTC_MASK which is not visible outside the
    enum type, so #else was always taken in compilation type. This patch
    fixes it by basing #ifdef on (pre-processor visible)
    DRM_VBLANK_HIGH_CRTC_SHIFT constant
    
    Signed-off-by: Ilija Hadzic <ihadzic at research.bell-labs.com>

diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index e618cc5..bbe1a94 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -778,7 +778,7 @@ static drmVBlankSeqType populate_vbl_request_type(RADEONInfoPtr info, int crtc)
     if (crtc == 1)
         type |= DRM_VBLANK_SECONDARY;
     else if (crtc > 1)
-#ifdef DRM_VBLANK_HIGH_CRTC_MASK
+#ifdef DRM_VBLANK_HIGH_CRTC_SHIFT
 	type |= (crtc << DRM_VBLANK_HIGH_CRTC_SHIFT) &
 		DRM_VBLANK_HIGH_CRTC_MASK;
 #else
commit 62a4cd180fe884dca24586d453395472516e6496
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed May 4 01:13:55 2011 -0400

    fusion: fix tiling enable logic
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 459ca68..aac17e2 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -670,18 +670,18 @@ Bool RADEONPreInit_KMS(ScrnInfoPtr pScrn, int flags)
 	    info->group_bytes = 256;
 	    info->have_tiling_info = FALSE;
 	    if (info->dri->pKernelDRMVersion->version_minor >= 6) {
-		if (r600_get_tile_config(pScrn))
+		if (r600_get_tile_config(pScrn)) {
 		    info->allowColorTiling = xf86ReturnOptValBool(info->Options,
 								  OPTION_COLOR_TILING, colorTilingDefault);
-		else
+		    /* need working DFS for tiling */
+		    if ((info->ChipFamily == CHIP_FAMILY_PALM) &&
+			(!info->accel_state->allowHWDFS))
+			info->allowColorTiling = FALSE;
+		} else
 		    info->allowColorTiling = FALSE;
 	    } else
 		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 			   "R6xx+ KMS Color Tiling requires radeon drm 2.6.0 or newer\n");
-
-	    /* need working DFS for tiling */
-	    if (info->ChipFamily == CHIP_FAMILY_PALM)
-		info->allowColorTiling = info->accel_state->allowHWDFS;
 	} else
 	    info->allowColorTiling = xf86ReturnOptValBool(info->Options,
 							  OPTION_COLOR_TILING, colorTilingDefault);
commit 76638ca687b02d3b1494b9868f817fd4fd892c64
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed May 4 01:06:22 2011 -0400

    fusion: enable tiling if DFS works
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index b8fcb99..459ca68 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -679,9 +679,9 @@ Bool RADEONPreInit_KMS(ScrnInfoPtr pScrn, int flags)
 		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 			   "R6xx+ KMS Color Tiling requires radeon drm 2.6.0 or newer\n");
 
-	    /* don't support tiling on APUs yet */
+	    /* need working DFS for tiling */
 	    if (info->ChipFamily == CHIP_FAMILY_PALM)
-		info->allowColorTiling = FALSE;
+		info->allowColorTiling = info->accel_state->allowHWDFS;
 	} else
 	    info->allowColorTiling = xf86ReturnOptValBool(info->Options,
 							  OPTION_COLOR_TILING, colorTilingDefault);
commit a6d2dba6573a3512d550d7e442bf42ea03012bbc
Author: Dave Airlie <airlied at redhat.com>
Date:   Wed May 4 10:44:43 2011 +1000

    radeon: add add hw DFS support for fusion
    
    Fusion had a bug setting up the VM on earlier kernels so we need to work
    around that and only enable accel on a new enough kernel.
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index d257939..69d29a8 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1553,7 +1553,7 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
 
     }
 
-    if (info->ChipFamily == CHIP_FAMILY_PALM)
+    if (!accel_state->allowHWDFS)
 	goto copy;
 
     scratch_pitch = RADEON_ALIGN(w, drmmode_get_pitch_align(pScrn, (bpp / 8), 0));
diff --git a/src/radeon.h b/src/radeon.h
index f655040..a9a2b69 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -717,6 +717,7 @@ struct radeon_accel_state {
     Bool              XInited3D; /* X itself has the 3D context */
     int               num_gb_pipes;
     Bool              has_tcl;
+    Bool              allowHWDFS;
 
 #ifdef USE_EXA
     /* EXA */
diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 0760170..b8fcb99 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -213,6 +213,28 @@ radeon_flush_callback(CallbackListPtr *list,
     }
 }
 
+static Bool RADEONIsFusionGARTWorking(ScrnInfoPtr pScrn)
+{
+    RADEONInfoPtr info = RADEONPTR(pScrn);
+    struct drm_radeon_info ginfo;
+    int r;
+    uint32_t tmp;
+
+#ifndef RADEON_INFO_FUSION_GART_WORKING
+#define RADEON_INFO_FUSION_GART_WORKING 0x0c
+#endif
+    memset(&ginfo, 0, sizeof(ginfo));
+    ginfo.request = RADEON_INFO_FUSION_GART_WORKING;
+    ginfo.value = (uintptr_t)&tmp;
+    r = drmCommandWriteRead(info->dri->drmFD, DRM_RADEON_INFO, &ginfo, sizeof(ginfo));
+    if (r) {
+	return FALSE;
+    }
+    if (tmp == 1)
+	return TRUE;
+    return FALSE;
+}
+
 static Bool RADEONIsAccelWorking(ScrnInfoPtr pScrn)
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
@@ -268,6 +290,11 @@ static Bool RADEONPreInitAccel_KMS(ScrnInfoPtr pScrn)
 	return TRUE;
     }
 
+    if (info->ChipFamily == CHIP_FAMILY_PALM) {
+	info->accel_state->allowHWDFS = RADEONIsFusionGARTWorking(pScrn);
+    } else
+	info->accel_state->allowHWDFS = TRUE;
+
     if ((info->ChipFamily == CHIP_FAMILY_RS100) ||
 	(info->ChipFamily == CHIP_FAMILY_RS200) ||
 	(info->ChipFamily == CHIP_FAMILY_RS300) ||
commit 859e052af49e68a826b77a9135c7f067dc331a06
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Tue May 3 15:15:04 2011 -0400

    radeon: add some new pci ids
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/ati_pciids_gen.h b/src/ati_pciids_gen.h
index ba46c17..9e1e086 100644
--- a/src/ati_pciids_gen.h
+++ b/src/ati_pciids_gen.h
@@ -471,6 +471,7 @@
 #define PCI_CHIP_CYPRESS_688D 0x688D
 #define PCI_CHIP_CYPRESS_6898 0x6898
 #define PCI_CHIP_CYPRESS_6899 0x6899
+#define PCI_CHIP_CYPRESS_689B 0x689B
 #define PCI_CHIP_CYPRESS_689E 0x689E
 #define PCI_CHIP_HEMLOCK_689C 0x689C
 #define PCI_CHIP_HEMLOCK_689D 0x689D
@@ -481,7 +482,9 @@
 #define PCI_CHIP_JUNIPER_68B0 0x68B0
 #define PCI_CHIP_JUNIPER_68B8 0x68B8
 #define PCI_CHIP_JUNIPER_68B9 0x68B9
+#define PCI_CHIP_JUNIPER_68BA 0x68BA
 #define PCI_CHIP_JUNIPER_68BE 0x68BE
+#define PCI_CHIP_JUNIPER_68BF 0x68BF
 #define PCI_CHIP_REDWOOD_68C0 0x68C0
 #define PCI_CHIP_REDWOOD_68C1 0x68C1
 #define PCI_CHIP_REDWOOD_68C7 0x68C7
@@ -516,6 +519,7 @@
 #define PCI_CHIP_CAYMAN_6719 0x6719
 #define PCI_CHIP_CAYMAN_671C 0x671C
 #define PCI_CHIP_CAYMAN_671D 0x671D
+#define PCI_CHIP_CAYMAN_671F 0x671F
 #define PCI_CHIP_BARTS_6720 0x6720
 #define PCI_CHIP_BARTS_6721 0x6721
 #define PCI_CHIP_BARTS_6722 0x6722
@@ -528,6 +532,7 @@
 #define PCI_CHIP_BARTS_6729 0x6729
 #define PCI_CHIP_BARTS_6738 0x6738
 #define PCI_CHIP_BARTS_6739 0x6739
+#define PCI_CHIP_BARTS_673E 0x673E
 #define PCI_CHIP_TURKS_6740 0x6740
 #define PCI_CHIP_TURKS_6741 0x6741
 #define PCI_CHIP_TURKS_6742 0x6742
diff --git a/src/pcidb/ati_pciids.csv b/src/pcidb/ati_pciids.csv
index fd1fdbc..04c204f 100644
--- a/src/pcidb/ati_pciids.csv
+++ b/src/pcidb/ati_pciids.csv
@@ -472,6 +472,7 @@
 "0x688D","CYPRESS_688D","CYPRESS",,,,,,"AMD Firestream 9350"
 "0x6898","CYPRESS_6898","CYPRESS",,,,,,"ATI Radeon HD 5800 Series"
 "0x6899","CYPRESS_6899","CYPRESS",,,,,,"ATI Radeon HD 5800 Series"
+"0x689B","CYPRESS_689B","CYPRESS",,,,,,"ATI Radeon HD 5800 Series"
 "0x689E","CYPRESS_689E","CYPRESS",,,,,,"ATI Radeon HD 5800 Series"
 "0x689C","HEMLOCK_689C","HEMLOCK",,,,,,"ATI Radeon HD 5900 Series"
 "0x689D","HEMLOCK_689D","HEMLOCK",,,,,,"ATI Radeon HD 5900 Series"
@@ -482,7 +483,9 @@
 "0x68B0","JUNIPER_68B0","JUNIPER",1,,,,,"ATI Mobility Radeon HD 5800 Series"
 "0x68B8","JUNIPER_68B8","JUNIPER",,,,,,"ATI Radeon HD 5700 Series"
 "0x68B9","JUNIPER_68B9","JUNIPER",,,,,,"ATI Radeon HD 5700 Series"
+"0x68BA","JUNIPER_68BA","JUNIPER",,,,,,"ATI Radeon HD 6700 Series"
 "0x68BE","JUNIPER_68BE","JUNIPER",,,,,,"ATI Radeon HD 5700 Series"
+"0x68BF","JUNIPER_68BF","JUNIPER",,,,,,"ATI Radeon HD 6700 Series"
 "0x68C0","REDWOOD_68C0","REDWOOD",1,,,,,"ATI Mobility Radeon HD 5000 Series"
 "0x68C1","REDWOOD_68C1","REDWOOD",1,,,,,"ATI Mobility Radeon HD 5000 Series"
 "0x68C7","REDWOOD_68C7","REDWOOD",1,,,,,"ATI Mobility Radeon HD 5570"
@@ -517,6 +520,7 @@
 "0x6719","CAYMAN_6719","CAYMAN",,,,,,"AMD Radeon HD 6900 Series"
 "0x671C","CAYMAN_671C","CAYMAN",,,,,,"CAYMAN"
 "0x671D","CAYMAN_671D","CAYMAN",,,,,,"CAYMAN"
+"0x671F","CAYMAN_671F","CAYMAN",,,,,,"CAYMAN"
 "0x6720","BARTS_6720","BARTS",1,,,,,"AMD Radeon HD 6900M Series"
 "0x6721","BARTS_6721","BARTS",1,,,,,"Mobility Radeon HD 6000 Series"
 "0x6722","BARTS_6722","BARTS",,,,,,"BARTS"
@@ -529,6 +533,7 @@
 "0x6729","BARTS_6729","BARTS",,,,,,"BARTS"
 "0x6738","BARTS_6738","BARTS",,,,,,"AMD Radeon HD 6800 Series"
 "0x6739","BARTS_6739","BARTS",,,,,,"AMD Radeon HD 6800 Series"
+"0x673E","BARTS_673E","BARTS",,,,,,"AMD Radeon HD 6700 Series"
 "0x6740","TURKS_6740","TURKS",1,,,,,"TURKS"
 "0x6741","TURKS_6741","TURKS",1,,,,,"TURKS"
 "0x6742","TURKS_6742","TURKS",1,,,,,"TURKS"
diff --git a/src/radeon_chipinfo_gen.h b/src/radeon_chipinfo_gen.h
index 630154b..23c1697 100644
--- a/src/radeon_chipinfo_gen.h
+++ b/src/radeon_chipinfo_gen.h
@@ -391,6 +391,7 @@ static RADEONCardInfo RADEONCards[] = {
  { 0x688D, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
  { 0x6898, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
  { 0x6899, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
+ { 0x689B, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
  { 0x689E, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
  { 0x689C, CHIP_FAMILY_HEMLOCK, 0, 0, 0, 0, 0 },
  { 0x689D, CHIP_FAMILY_HEMLOCK, 0, 0, 0, 0, 0 },
@@ -401,7 +402,9 @@ static RADEONCardInfo RADEONCards[] = {
  { 0x68B0, CHIP_FAMILY_JUNIPER, 1, 0, 0, 0, 0 },
  { 0x68B8, CHIP_FAMILY_JUNIPER, 0, 0, 0, 0, 0 },
  { 0x68B9, CHIP_FAMILY_JUNIPER, 0, 0, 0, 0, 0 },
+ { 0x68BA, CHIP_FAMILY_JUNIPER, 0, 0, 0, 0, 0 },
  { 0x68BE, CHIP_FAMILY_JUNIPER, 0, 0, 0, 0, 0 },
+ { 0x68BF, CHIP_FAMILY_JUNIPER, 0, 0, 0, 0, 0 },
  { 0x68C0, CHIP_FAMILY_REDWOOD, 1, 0, 0, 0, 0 },
  { 0x68C1, CHIP_FAMILY_REDWOOD, 1, 0, 0, 0, 0 },
  { 0x68C7, CHIP_FAMILY_REDWOOD, 1, 0, 0, 0, 0 },
@@ -436,6 +439,7 @@ static RADEONCardInfo RADEONCards[] = {
  { 0x6719, CHIP_FAMILY_CAYMAN, 0, 0, 0, 0, 0 },
  { 0x671C, CHIP_FAMILY_CAYMAN, 0, 0, 0, 0, 0 },
  { 0x671D, CHIP_FAMILY_CAYMAN, 0, 0, 0, 0, 0 },
+ { 0x671F, CHIP_FAMILY_CAYMAN, 0, 0, 0, 0, 0 },
  { 0x6720, CHIP_FAMILY_BARTS, 1, 0, 0, 0, 0 },
  { 0x6721, CHIP_FAMILY_BARTS, 1, 0, 0, 0, 0 },
  { 0x6722, CHIP_FAMILY_BARTS, 0, 0, 0, 0, 0 },
@@ -448,6 +452,7 @@ static RADEONCardInfo RADEONCards[] = {
  { 0x6729, CHIP_FAMILY_BARTS, 0, 0, 0, 0, 0 },
  { 0x6738, CHIP_FAMILY_BARTS, 0, 0, 0, 0, 0 },
  { 0x6739, CHIP_FAMILY_BARTS, 0, 0, 0, 0, 0 },
+ { 0x673E, CHIP_FAMILY_BARTS, 0, 0, 0, 0, 0 },
  { 0x6740, CHIP_FAMILY_TURKS, 1, 0, 0, 0, 0 },
  { 0x6741, CHIP_FAMILY_TURKS, 1, 0, 0, 0, 0 },
  { 0x6742, CHIP_FAMILY_TURKS, 1, 0, 0, 0, 0 },
diff --git a/src/radeon_chipset_gen.h b/src/radeon_chipset_gen.h
index 9996e0a..fb86211 100644
--- a/src/radeon_chipset_gen.h
+++ b/src/radeon_chipset_gen.h
@@ -391,6 +391,7 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_CYPRESS_688D, "AMD Firestream 9350" },
   { PCI_CHIP_CYPRESS_6898, "ATI Radeon HD 5800 Series" },
   { PCI_CHIP_CYPRESS_6899, "ATI Radeon HD 5800 Series" },
+  { PCI_CHIP_CYPRESS_689B, "ATI Radeon HD 5800 Series" },
   { PCI_CHIP_CYPRESS_689E, "ATI Radeon HD 5800 Series" },
   { PCI_CHIP_HEMLOCK_689C, "ATI Radeon HD 5900 Series" },
   { PCI_CHIP_HEMLOCK_689D, "ATI Radeon HD 5900 Series" },
@@ -401,7 +402,9 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_JUNIPER_68B0, "ATI Mobility Radeon HD 5800 Series" },
   { PCI_CHIP_JUNIPER_68B8, "ATI Radeon HD 5700 Series" },
   { PCI_CHIP_JUNIPER_68B9, "ATI Radeon HD 5700 Series" },
+  { PCI_CHIP_JUNIPER_68BA, "ATI Radeon HD 6700 Series" },
   { PCI_CHIP_JUNIPER_68BE, "ATI Radeon HD 5700 Series" },
+  { PCI_CHIP_JUNIPER_68BF, "ATI Radeon HD 6700 Series" },
   { PCI_CHIP_REDWOOD_68C0, "ATI Mobility Radeon HD 5000 Series" },
   { PCI_CHIP_REDWOOD_68C1, "ATI Mobility Radeon HD 5000 Series" },
   { PCI_CHIP_REDWOOD_68C7, "ATI Mobility Radeon HD 5570" },
@@ -436,6 +439,7 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_CAYMAN_6719, "AMD Radeon HD 6900 Series" },
   { PCI_CHIP_CAYMAN_671C, "CAYMAN" },
   { PCI_CHIP_CAYMAN_671D, "CAYMAN" },
+  { PCI_CHIP_CAYMAN_671F, "CAYMAN" },
   { PCI_CHIP_BARTS_6720, "AMD Radeon HD 6900M Series" },
   { PCI_CHIP_BARTS_6721, "Mobility Radeon HD 6000 Series" },
   { PCI_CHIP_BARTS_6722, "BARTS" },
@@ -448,6 +452,7 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_BARTS_6729, "BARTS" },
   { PCI_CHIP_BARTS_6738, "AMD Radeon HD 6800 Series" },
   { PCI_CHIP_BARTS_6739, "AMD Radeon HD 6800 Series" },
+  { PCI_CHIP_BARTS_673E, "AMD Radeon HD 6700 Series" },
   { PCI_CHIP_TURKS_6740, "TURKS" },
   { PCI_CHIP_TURKS_6741, "TURKS" },
   { PCI_CHIP_TURKS_6742, "TURKS" },
diff --git a/src/radeon_pci_chipset_gen.h b/src/radeon_pci_chipset_gen.h
index 67fec23..64af176 100644
--- a/src/radeon_pci_chipset_gen.h
+++ b/src/radeon_pci_chipset_gen.h
@@ -391,6 +391,7 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_CYPRESS_688D, PCI_CHIP_CYPRESS_688D, RES_SHARED_VGA },
  { PCI_CHIP_CYPRESS_6898, PCI_CHIP_CYPRESS_6898, RES_SHARED_VGA },
  { PCI_CHIP_CYPRESS_6899, PCI_CHIP_CYPRESS_6899, RES_SHARED_VGA },
+ { PCI_CHIP_CYPRESS_689B, PCI_CHIP_CYPRESS_689B, RES_SHARED_VGA },
  { PCI_CHIP_CYPRESS_689E, PCI_CHIP_CYPRESS_689E, RES_SHARED_VGA },
  { PCI_CHIP_HEMLOCK_689C, PCI_CHIP_HEMLOCK_689C, RES_SHARED_VGA },
  { PCI_CHIP_HEMLOCK_689D, PCI_CHIP_HEMLOCK_689D, RES_SHARED_VGA },
@@ -401,7 +402,9 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_JUNIPER_68B0, PCI_CHIP_JUNIPER_68B0, RES_SHARED_VGA },
  { PCI_CHIP_JUNIPER_68B8, PCI_CHIP_JUNIPER_68B8, RES_SHARED_VGA },
  { PCI_CHIP_JUNIPER_68B9, PCI_CHIP_JUNIPER_68B9, RES_SHARED_VGA },
+ { PCI_CHIP_JUNIPER_68BA, PCI_CHIP_JUNIPER_68BA, RES_SHARED_VGA },
  { PCI_CHIP_JUNIPER_68BE, PCI_CHIP_JUNIPER_68BE, RES_SHARED_VGA },
+ { PCI_CHIP_JUNIPER_68BF, PCI_CHIP_JUNIPER_68BF, RES_SHARED_VGA },
  { PCI_CHIP_REDWOOD_68C0, PCI_CHIP_REDWOOD_68C0, RES_SHARED_VGA },
  { PCI_CHIP_REDWOOD_68C1, PCI_CHIP_REDWOOD_68C1, RES_SHARED_VGA },
  { PCI_CHIP_REDWOOD_68C7, PCI_CHIP_REDWOOD_68C7, RES_SHARED_VGA },
@@ -436,6 +439,7 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_CAYMAN_6719, PCI_CHIP_CAYMAN_6719, RES_SHARED_VGA },
  { PCI_CHIP_CAYMAN_671C, PCI_CHIP_CAYMAN_671C, RES_SHARED_VGA },
  { PCI_CHIP_CAYMAN_671D, PCI_CHIP_CAYMAN_671D, RES_SHARED_VGA },
+ { PCI_CHIP_CAYMAN_671F, PCI_CHIP_CAYMAN_671F, RES_SHARED_VGA },
  { PCI_CHIP_BARTS_6720, PCI_CHIP_BARTS_6720, RES_SHARED_VGA },
  { PCI_CHIP_BARTS_6721, PCI_CHIP_BARTS_6721, RES_SHARED_VGA },
  { PCI_CHIP_BARTS_6722, PCI_CHIP_BARTS_6722, RES_SHARED_VGA },
@@ -448,6 +452,7 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_BARTS_6729, PCI_CHIP_BARTS_6729, RES_SHARED_VGA },
  { PCI_CHIP_BARTS_6738, PCI_CHIP_BARTS_6738, RES_SHARED_VGA },
  { PCI_CHIP_BARTS_6739, PCI_CHIP_BARTS_6739, RES_SHARED_VGA },
+ { PCI_CHIP_BARTS_673E, PCI_CHIP_BARTS_673E, RES_SHARED_VGA },
  { PCI_CHIP_TURKS_6740, PCI_CHIP_TURKS_6740, RES_SHARED_VGA },
  { PCI_CHIP_TURKS_6741, PCI_CHIP_TURKS_6741, RES_SHARED_VGA },
  { PCI_CHIP_TURKS_6742, PCI_CHIP_TURKS_6742, RES_SHARED_VGA },
diff --git a/src/radeon_pci_device_match_gen.h b/src/radeon_pci_device_match_gen.h
index 60b975c..e09daae 100644
--- a/src/radeon_pci_device_match_gen.h
+++ b/src/radeon_pci_device_match_gen.h
@@ -391,6 +391,7 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_688D, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_6898, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_6899, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_689B, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_689E, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_HEMLOCK_689C, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_HEMLOCK_689D, 0 ),
@@ -401,7 +402,9 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_JUNIPER_68B0, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_JUNIPER_68B8, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_JUNIPER_68B9, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_JUNIPER_68BA, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_JUNIPER_68BE, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_JUNIPER_68BF, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_REDWOOD_68C0, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_REDWOOD_68C1, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_REDWOOD_68C7, 0 ),
@@ -436,6 +439,7 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_CAYMAN_6719, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CAYMAN_671C, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CAYMAN_671D, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_CAYMAN_671F, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_BARTS_6720, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_BARTS_6721, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_BARTS_6722, 0 ),
@@ -448,6 +452,7 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_BARTS_6729, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_BARTS_6738, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_BARTS_6739, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_BARTS_673E, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_TURKS_6740, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_TURKS_6741, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_TURKS_6742, 0 ),
commit 8f8bbf628c6eed037f57bc8c155f0ecdacbebad1
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Sun May 1 13:19:15 2011 -0400

    man: add cayman to man page
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/man/radeon.man b/man/radeon.man
index 295b194..4ec7650 100644
--- a/man/radeon.man
+++ b/man/radeon.man
@@ -193,6 +193,9 @@ Radeon HD 6570/6670
 .TP 12
 .B CAICOS
 Radeon HD 6450
+.TP 12
+.B CAYMAN
+Radeon HD 6950/6970/6990
 .PD
 .SH CONFIGURATION DETAILS
 Please refer to __xconfigfile__(__filemansuffix__) for general configuration
commit 903e90c31cf0319be9297529aa7b8daa1756cf63
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Apr 20 03:10:08 2011 -0400

    EXA/Xv: used cached bo tiling flags for accel setup on 6xx+
    
    This avoids calling into the kernel for each bo in the accel
    code.  This is a follow on to:
    cc7d1fa39da40a532fcdbe6c7924ca47a879e66a
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index d93cb42..d257939 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -68,6 +68,7 @@ EVERGREENPrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
 
     dst.offset = 0;
     dst.bo = radeon_get_pixmap_bo(pPix);
+    dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
 
     dst.pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8);
     dst.width = pPix->drawable.width;
@@ -448,6 +449,8 @@ EVERGREENPrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
     dst_obj.offset = 0;
     src_obj.bo = radeon_get_pixmap_bo(pSrc);
     dst_obj.bo = radeon_get_pixmap_bo(pDst);
+    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
+    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
     if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
 	accel_state->same_surface = TRUE;
 
@@ -1095,6 +1098,8 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     dst_obj.offset = 0;
     src_obj.bo = radeon_get_pixmap_bo(pSrc);
     dst_obj.bo = radeon_get_pixmap_bo(pDst);
+    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
+    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
 
     src_obj.pitch = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8);
     dst_obj.pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8);
@@ -1112,6 +1117,7 @@ static Bool EVERGREENPrepareComposite(int op, PicturePtr pSrcPicture,
     if (pMask) {
 	mask_obj.offset = 0;
 	mask_obj.bo = radeon_get_pixmap_bo(pMask);
+	mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
 	mask_obj.pitch = exaGetPixmapPitch(pMask) / (pMask->drawable.bitsPerPixel / 8);
 
 	mask_obj.width = pMask->drawable.width;
@@ -1438,6 +1444,7 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
     src_obj.bpp = bpp;
     src_obj.domain = RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = scratch;
+    src_obj.tiling_flags = 0;
 
     dst_obj.pitch = dst_pitch_hw;
     dst_obj.width = pDst->drawable.width;
@@ -1446,6 +1453,7 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
     dst_obj.bpp = bpp;
     dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
     dst_obj.bo = radeon_get_pixmap_bo(pDst);
+    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
 
     if (!R600SetAccelState(pScrn,
 			   &src_obj,
@@ -1575,6 +1583,7 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
     src_obj.bpp = bpp;
     src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = radeon_get_pixmap_bo(pSrc);
+    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
 
     dst_obj.pitch = scratch_pitch;
     dst_obj.width = w;
@@ -1583,6 +1592,7 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
     dst_obj.bo = scratch;
     dst_obj.bpp = bpp;
     dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
+    dst_obj.tiling_flags = 0;
 
     if (!R600SetAccelState(pScrn,
 			   &src_obj,
diff --git a/src/evergreen_textured_videofuncs.c b/src/evergreen_textured_videofuncs.c
index 147cd4e..6200cdc 100644
--- a/src/evergreen_textured_videofuncs.c
+++ b/src/evergreen_textured_videofuncs.c
@@ -154,18 +154,11 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     CLEAR (vs_const_conf);
     CLEAR (ps_const_conf);
 
-#if defined(XF86DRM_MODE)
-    if (info->cs) {
-	dst_obj.offset = 0;
-	src_obj.offset = 0;
-	dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
-    } else
-#endif
-    {
-	dst_obj.offset = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset;
-	src_obj.offset = pPriv->src_offset + info->fbLocation + pScrn->fbOffset;
-	dst_obj.bo = src_obj.bo = NULL;
-    }
+    dst_obj.offset = 0;
+    src_obj.offset = 0;
+    dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
+    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap);
+
     dst_obj.pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8);
 
     src_obj.pitch = pPriv->src_pitch;
@@ -174,6 +167,7 @@ EVERGREENDisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     src_obj.bpp = 16;
     src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
+    src_obj.tiling_flags = 0;
 
     dst_obj.width = pPixmap->drawable.width;
     dst_obj.height = pPixmap->drawable.height;
diff --git a/src/r600_exa.c b/src/r600_exa.c
index c6a244c..2673599 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -53,7 +53,6 @@ R600SetAccelState(ScrnInfoPtr pScrn,
 {
     RADEONInfoPtr info = RADEONPTR(pScrn);
     struct radeon_accel_state *accel_state = info->accel_state;
-    uint32_t pitch = 0;
     uint32_t pitch_align = 0x7, base_align = 0xff;
 #if defined(XF86DRM_MODE)
     int ret;
@@ -64,11 +63,6 @@ R600SetAccelState(ScrnInfoPtr pScrn,
 	accel_state->src_size[0] = src0->pitch * src0->height * (src0->bpp/8);
 #if defined(XF86DRM_MODE)
 	if (info->cs) {
-	    ret = radeon_bo_get_tiling(accel_state->src_obj[0].bo,
-				       &accel_state->src_obj[0].tiling_flags,
-				       &pitch);
-	    if (ret)
-		RADEON_FALLBACK(("src0 radeon_bo_get_tiling failed\n"));
 	    pitch_align = drmmode_get_pitch_align(pScrn,
 						  accel_state->src_obj[0].bpp / 8,
 						  accel_state->src_obj[0].tiling_flags) - 1;
@@ -95,11 +89,6 @@ R600SetAccelState(ScrnInfoPtr pScrn,
 	accel_state->src_size[1] = src1->pitch * src1->height * (src1->bpp/8);
 #if defined(XF86DRM_MODE)
 	if (info->cs) {
-	    ret = radeon_bo_get_tiling(accel_state->src_obj[1].bo,
-				       &accel_state->src_obj[1].tiling_flags,
-				       &pitch);
-	    if (ret)
-		RADEON_FALLBACK(("src1 radeon_bo_get_tiling failed\n"));
 	    pitch_align = drmmode_get_pitch_align(pScrn,
 						  accel_state->src_obj[1].bpp / 8,
 						  accel_state->src_obj[1].tiling_flags) - 1;
@@ -125,11 +114,6 @@ R600SetAccelState(ScrnInfoPtr pScrn,
 	accel_state->dst_size = dst->pitch * dst->height * (dst->bpp/8);
 #if defined(XF86DRM_MODE)
 	if (info->cs) {
-	    ret = radeon_bo_get_tiling(accel_state->dst_obj.bo,
-				       &accel_state->dst_obj.tiling_flags,
-				       &pitch);
-	    if (ret)
-		RADEON_FALLBACK(("dst radeon_bo_get_tiling failed\n"));
 	    pitch_align = drmmode_get_pitch_align(pScrn,
 						  accel_state->dst_obj.bpp / 8,
 						  accel_state->dst_obj.tiling_flags) - 1;
@@ -210,6 +194,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg)
     if (info->cs) {
 	dst.offset = 0;
 	dst.bo = radeon_get_pixmap_bo(pPix);
+	dst.tiling_flags = radeon_get_pixmap_tiling(pPix);
     } else
 #endif
     {
@@ -589,6 +574,8 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
 	dst_obj.offset = 0;
 	src_obj.bo = radeon_get_pixmap_bo(pSrc);
 	dst_obj.bo = radeon_get_pixmap_bo(pDst);
+	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
+	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
 	if (radeon_get_pixmap_bo(pSrc) == radeon_get_pixmap_bo(pDst))
 	    accel_state->same_surface = TRUE;
     } else
@@ -1269,6 +1256,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	dst_obj.offset = 0;
 	src_obj.bo = radeon_get_pixmap_bo(pSrc);
 	dst_obj.bo = radeon_get_pixmap_bo(pDst);
+	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
+	src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
     } else
 #endif
     {
@@ -1295,6 +1284,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture,
 	if (info->cs) {
 	    mask_obj.offset = 0;
 	    mask_obj.bo = radeon_get_pixmap_bo(pMask);
+	    mask_obj.tiling_flags = radeon_get_pixmap_tiling(pMask);
 	} else
 #endif
 	{
@@ -1820,6 +1810,7 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     src_obj.bpp = bpp;
     src_obj.domain = RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = scratch;
+    src_obj.tiling_flags = 0;
 
     dst_obj.pitch = dst_pitch_hw;
     dst_obj.width = pDst->drawable.width;
@@ -1828,6 +1819,7 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     dst_obj.bpp = bpp;
     dst_obj.domain = RADEON_GEM_DOMAIN_VRAM;
     dst_obj.bo = radeon_get_pixmap_bo(pDst);
+    dst_obj.tiling_flags = radeon_get_pixmap_tiling(pDst);
 
     if (!R600SetAccelState(pScrn,
 			   &src_obj,
@@ -1953,6 +1945,7 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     src_obj.bpp = bpp;
     src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = radeon_get_pixmap_bo(pSrc);
+    src_obj.tiling_flags = radeon_get_pixmap_tiling(pSrc);
 
     dst_obj.pitch = scratch_pitch;
     dst_obj.width = w;
@@ -1961,6 +1954,7 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     dst_obj.bo = scratch;
     dst_obj.bpp = bpp;
     dst_obj.domain = RADEON_GEM_DOMAIN_GTT;
+    dst_obj.tiling_flags = 0;
 
     if (!R600SetAccelState(pScrn,
 			   &src_obj,
diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c
index 4ff0833..aab43f3 100644
--- a/src/r600_textured_videofuncs.c
+++ b/src/r600_textured_videofuncs.c
@@ -169,6 +169,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
 	dst_obj.offset = 0;
 	src_obj.offset = 0;
 	dst_obj.bo = radeon_get_pixmap_bo(pPixmap);
+	dst_obj.tiling_flags = radeon_get_pixmap_tiling(pPixmap);
     } else
 #endif
     {
@@ -184,7 +185,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     src_obj.bpp = 16;
     src_obj.domain = RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT;
     src_obj.bo = pPriv->src_bo[pPriv->currentBuffer];
-    
+    src_obj.tiling_flags = 0;
+
     dst_obj.width = pPixmap->drawable.width;
     dst_obj.height = pPixmap->drawable.height;
     dst_obj.bpp = pPixmap->drawable.bitsPerPixel;
diff --git a/src/radeon.h b/src/radeon.h
index 9283c4d..f655040 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -1384,6 +1384,7 @@ void radeon_kms_update_vram_limit(ScrnInfoPtr pScrn, int new_fb_size);
 #endif
 struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix);
 void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo);
+uint32_t radeon_get_pixmap_tiling(PixmapPtr pPix);
 
 #ifdef XF86DRI
 #  ifdef USE_XAA
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index c11c938..f3daec0 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -532,6 +532,13 @@ struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix)
     return driver_priv->bo;
 }
 
+uint32_t radeon_get_pixmap_tiling(PixmapPtr pPix)
+{
+    struct radeon_exa_pixmap_priv *driver_priv;
+    driver_priv = exaGetPixmapDriverPrivate(pPix);
+    return driver_priv->tiling_flags;
+}
+
 void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo)
 {
     struct radeon_exa_pixmap_priv *driver_priv;
commit 982c22f16c8eeee9be81779fbfe17d8d3f9b6897
Author: Adam Jackson <ajax at redhat.com>
Date:   Thu Apr 14 16:04:50 2011 -0400

    R520: Fix textures larger than 2k
    
    Ported from the equivalent fix in Mesa.

diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 43d3555..e5c231f 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -1180,10 +1180,10 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 					int unit)
 {
     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
-    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
+    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
     int w = pPict->pDrawable->width;
     int h = pPict->pDrawable->height;
-    int i, pixel_shift;
+    int i, pixel_shift, out_size = 6;
     unsigned int repeatType = pPict->repeat ? pPict->repeatType : RepeatNone;
     struct radeon_exa_pixmap_priv *driver_priv;
     ACCEL_PREAMBLE();
@@ -1230,6 +1230,26 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
     if (IS_R500_3D && ((h - 1) & 0x800))
 	txpitch |= R500_TXHEIGHT_11;
 
+    if (info->ChipFamily == CHIP_FAMILY_R520) {
+	unsigned us_width = (w - 1) & 0x7ff;
+	unsigned us_height = (h - 1) & 0x7ff;
+	unsigned us_depth = 0;
+
+	if (w > 2048) {
+	    us_width = (0x7ff + us_width) >> 1;
+	    us_depth |= 0x0d;
+	}
+	if (h > 2048) {
+	    us_height = (0x7ff + us_height) >> 1;
+	    us_depth |= 0x0e;
+	}
+
+	us_format = (us_width << R300_TXWIDTH_SHIFT) |
+		    (us_height << R300_TXHEIGHT_SHIFT) |
+		    (us_depth << R300_TXDEPTH_SHIFT);
+	out_size++;
+    }
+
     /* Use TXPITCH instead of TXWIDTH for address computations: we could
      * omit this if there is no padding, but there is no apparent advantage
      * in doing so.
@@ -1276,7 +1296,9 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 	RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter));
     }
 
-    BEGIN_ACCEL_RELOC(repeatType == RepeatNone ? 7 : 6, 1);
+    if (repeatType == RepeatNone)
+	out_size++;
+    BEGIN_ACCEL_RELOC(out_size, 1);
     OUT_ACCEL_REG(R300_TX_FILTER0_0 + (unit * 4), txfilter);
     OUT_ACCEL_REG(R300_TX_FILTER1_0 + (unit * 4), 0);
     OUT_ACCEL_REG(R300_TX_FORMAT0_0 + (unit * 4), txformat0);
@@ -1287,6 +1309,8 @@ static Bool FUNC_NAME(R300TextureSetup)(PicturePtr pPict, PixmapPtr pPix,
 
     if (repeatType == RepeatNone)
 	OUT_ACCEL_REG(R300_TX_BORDER_COLOR_0 + (unit * 4), 0);
+    if (info->ChipFamily == CHIP_FAMILY_R520)
+	OUT_ACCEL_REG(R500_US_FORMAT0_0 + (unit * 4), us_format);
     FINISH_ACCEL();
 
     if (pPict->transform != 0) {
diff --git a/src/radeon_reg.h b/src/radeon_reg.h
index e61c29d..fbf1558 100644
--- a/src/radeon_reg.h
+++ b/src/radeon_reg.h
@@ -4688,6 +4688,7 @@
 #define R300_TX_FORMAT0_2				0x4488
 #       define R300_TXWIDTH_SHIFT                       0
 #       define R300_TXHEIGHT_SHIFT                      11
+#       define R300_TXDEPTH_SHIFT                       22
 #       define R300_NUM_LEVELS_SHIFT                    26
 #       define R300_NUM_LEVELS_MASK                     0x
 #       define R300_TXPROJECTED                         (1 << 30)
diff --git a/src/radeon_textured_videofuncs.c b/src/radeon_textured_videofuncs.c
index a22c416..84aba6f 100644
--- a/src/radeon_textured_videofuncs.c
+++ b/src/radeon_textured_videofuncs.c
@@ -2661,11 +2661,11 @@ FUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     PixmapPtr pPixmap = pPriv->pPixmap;
     struct radeon_exa_pixmap_priv *driver_priv;
     struct radeon_bo *src_bo = pPriv->src_bo[pPriv->currentBuffer];
-    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch;
+    uint32_t txfilter, txformat0, txformat1, txoffset, txpitch, us_format = 0;
     uint32_t dst_pitch, dst_format;
     uint32_t txenable, colorpitch, bicubic_offset;
     uint32_t output_fmt;
-    int pixel_shift;
+    int pixel_shift, out_size = 6;
     ACCEL_PREAMBLE();
 
 #ifdef XF86DRM_MODE
@@ -2791,15 +2791,36 @@ FUNC_NAME(R500PrepareTexturedVideo)(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv)
     if ((pPriv->h - 1) & 0x800)
 	txpitch |= R500_TXHEIGHT_11;
 
+    if (info->ChipFamily == CHIP_FAMILY_R520) {
+	unsigned us_width = (pPriv->w - 1) & 0x7ff;
+	unsigned us_height = (pPriv->h - 1) & 0x7ff;
+	unsigned us_depth = 0;
+
+	if (pPriv->w > 2048) {
+	    us_width = (0x7ff + us_width) >> 1;
+	    us_depth |= 0x0d;
+	}
+	if (pPriv->h > 2048) {
+	    us_height = (0x7ff + us_height) >> 1;
+	    us_depth |= 0x0e;
+	}
+	us_format = (us_width << R300_TXWIDTH_SHIFT) |
+		    (us_height << R300_TXHEIGHT_SHIFT) |
+		    (us_depth << R300_TXDEPTH_SHIFT);
+	out_size++;
+    }
+
     txoffset = info->cs ? 0 : pPriv->src_offset;
 
-    BEGIN_ACCEL_RELOC(6, 1);
+    BEGIN_ACCEL_RELOC(out_size, 1);
     OUT_ACCEL_REG(R300_TX_FILTER0_0, txfilter);
     OUT_ACCEL_REG(R300_TX_FILTER1_0, 0);
     OUT_ACCEL_REG(R300_TX_FORMAT0_0, txformat0);
     OUT_ACCEL_REG(R300_TX_FORMAT1_0, txformat1);
     OUT_ACCEL_REG(R300_TX_FORMAT2_0, txpitch);
     OUT_TEXTURE_REG(R300_TX_OFFSET_0, txoffset, src_bo);
+    if (info->ChipFamily == CHIP_FAMILY_R520)
+	OUT_ACCEL_REG(R500_US_FORMAT0_0, us_format);
     FINISH_ACCEL();
 
     txenable = R300_TEX_0_ENABLE;
commit cc7d1fa39da40a532fcdbe6c7924ca47a879e66a
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Mon Apr 4 17:37:12 2011 +0200

    EXA: Cache BO tiling flags.
    
    Calling into the kernel every time is quite expensive, and nobody else should
    ever change the tiling flags.
    
    There's still more to do along the same lines for >= R6xx.

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index ae73e38..d93cb42 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -1402,7 +1402,7 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
     Bool r;
     int i;
     struct r600_accel_object src_obj, dst_obj;
-    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
+    uint32_t height, base_align;
 
     if (bpp < 8)
 	return FALSE;
@@ -1411,14 +1411,10 @@ EVERGREENUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h,
     if (!driver_priv || !driver_priv->bo)
 	return FALSE;
 
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	ErrorF("radeon_bo_get_tiling failed\n");
-
     /* If we know the BO won't be busy, don't bother with a scratch */
     copy_dst = driver_priv->bo;
     copy_pitch = pDst->devKind;
-    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
+    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
 	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	    flush = FALSE;
 	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
@@ -1519,7 +1515,7 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
     Bool flush = FALSE;
     Bool r;
     struct r600_accel_object src_obj, dst_obj;
-    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
+    uint32_t height, base_align;
 
     if (bpp < 8)
 	return FALSE;
@@ -1528,14 +1524,10 @@ EVERGREENDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w,
     if (!driver_priv || !driver_priv->bo)
 	return FALSE;
 
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	ErrorF("radeon_bo_get_tiling failed\n");
-
     /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
     copy_src = driver_priv->bo;
     copy_pitch = pSrc->devKind;
-    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
+    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
 	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 7736d24..c6a244c 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -1784,7 +1784,7 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     Bool r;
     int i;
     struct r600_accel_object src_obj, dst_obj;
-    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
+    uint32_t height, base_align;
 
     if (bpp < 8)
 	return FALSE;
@@ -1793,14 +1793,10 @@ R600UploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     if (!driver_priv || !driver_priv->bo)
 	return FALSE;
 
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	ErrorF("radeon_bo_get_tiling failed\n");
-
     /* If we know the BO won't be busy, don't bother with a scratch */
     copy_dst = driver_priv->bo;
     copy_pitch = pDst->devKind;
-    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
+    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
 	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	    flush = FALSE;
 	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
@@ -1901,7 +1897,7 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     Bool flush = FALSE;
     Bool r;
     struct r600_accel_object src_obj, dst_obj;
-    uint32_t tiling_flags = 0, pitch = 0, height, base_align;
+    uint32_t height, base_align;
 
     if (bpp < 8)
 	return FALSE;
@@ -1910,14 +1906,10 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     if (!driver_priv || !driver_priv->bo)
 	return FALSE;
 
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	ErrorF("radeon_bo_get_tiling failed\n");
-
     /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
     copy_src = driver_priv->bo;
     copy_pitch = pSrc->devKind;
-    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
+    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
 	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
diff --git a/src/radeon.h b/src/radeon.h
index a6d20d7..9283c4d 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -495,7 +495,7 @@ typedef struct _atomBiosHandle *atomBiosHandlePtr;
 
 struct radeon_exa_pixmap_priv {
     struct radeon_bo *bo;
-    int flags;
+    uint32_t tiling_flags;
     Bool bo_mapped;
 };
 
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 1c647b9..c11c938 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -130,6 +130,13 @@ static Bool RADEONPixmapIsColortiled(PixmapPtr pPix)
 {
     RINFO_FROM_SCREEN(pPix->drawable.pScreen);
 
+#ifdef XF86DRM_MODE
+    if (info->cs) {
+	/* Taken care of by the kernel relocation handling */
+	return FALSE;
+    }
+#endif
+
     /* This doesn't account for the back buffer, which we may want to wrap in
      * a pixmap at some point for the purposes of DRI buffer moves.
      */
@@ -308,7 +315,6 @@ Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
 #endif
     Bool flush = FALSE;
     int ret;
-    uint32_t tiling_flags = 0, pitch = 0;
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
     /* May need to handle byte swapping in DownloadFrom/UploadToScreen */
@@ -320,12 +326,8 @@ Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index)
     if (!driver_priv)
       return FALSE;
 
-    /* check if we are tiled */
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	return FALSE;
     /* untile in DFS/UTS */
-    if (tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))
+    if (driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))
 	return FALSE;
 
     /* if we have more refs than just the BO then flush */
@@ -505,8 +507,8 @@ void *RADEONEXACreatePixmap2(ScreenPtr pScreen, int width, int height,
 	return NULL;
     }
 
-    if (tiling)
-	radeon_bo_set_tiling(new_priv->bo, tiling, *new_pitch);
+    if (tiling && !radeon_bo_set_tiling(new_priv->bo, tiling, *new_pitch))
+	new_priv->tiling_flags = tiling;
 
     return new_priv;
 }
@@ -536,11 +538,15 @@ void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo)
 
     driver_priv = exaGetPixmapDriverPrivate(pPix);
     if (driver_priv) {
+	uint32_t pitch;
+
 	if (driver_priv->bo)
 	    radeon_bo_unref(driver_priv->bo);
 
 	radeon_bo_ref(bo);
 	driver_priv->bo = bo;
+
+	radeon_bo_get_tiling(bo, &driver_priv->tiling_flags, &pitch);
     }
 }
 
diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c
index e8c5571..b6767f0 100644
--- a/src/radeon_exa_funcs.c
+++ b/src/radeon_exa_funcs.c
@@ -474,7 +474,6 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     Bool flush = TRUE;
     Bool r;
     int i;
-    uint32_t tiling_flags = 0, pitch = 0;
 
     if (bpp < 8)
 	return FALSE;
@@ -483,10 +482,6 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     if (!driver_priv || !driver_priv->bo)
 	return FALSE;
 
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	ErrorF("radeon_bo_get_tiling failed\n");
-
 #if X_BYTE_ORDER == X_BIG_ENDIAN
     switch (bpp) {
     case 32:
@@ -501,7 +496,7 @@ RADEONUploadToScreenCS(PixmapPtr pDst, int x, int y, int w, int h,
     /* If we know the BO won't be busy, don't bother with a scratch */
     copy_dst = driver_priv->bo;
     copy_pitch = pDst->devKind;
-    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
+    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
 	if (!radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	    flush = FALSE;
 	    if (!radeon_bo_is_busy(driver_priv->bo, &dst_domain))
@@ -580,7 +575,6 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     int ret;
     Bool flush = FALSE;
     Bool r;
-    uint32_t tiling_flags = 0, pitch = 0;
 
     if (bpp < 8)
 	return FALSE;
@@ -589,10 +583,6 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     if (!driver_priv || !driver_priv->bo)
 	return FALSE;
 
-    ret = radeon_bo_get_tiling(driver_priv->bo, &tiling_flags, &pitch);
-    if (ret)
-	ErrorF("radeon_bo_get_tiling failed\n");
-
 #if X_BYTE_ORDER == X_BIG_ENDIAN
     switch (bpp) {
     case 32:
@@ -607,7 +597,7 @@ RADEONDownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w,
     /* If we know the BO won't end up in VRAM anyway, don't bother with a scratch */
     copy_src = driver_priv->bo;
     copy_pitch = pSrc->devKind;
-    if (!(tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
+    if (!(driver_priv->tiling_flags & (RADEON_TILING_MACRO | RADEON_TILING_MICRO))) {
 	if (radeon_bo_is_referenced_by_cs(driver_priv->bo, info->cs)) {
 	    src_domain = radeon_bo_get_src_domain(driver_priv->bo);
 	    if ((src_domain & (RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM)) ==
commit f0b7d7b449cc77bb2b281d81108507f8bc2e6018
Author: Michel Dänzer <daenzer at vmware.com>
Date:   Tue Apr 5 13:36:01 2011 +0200

    DRI2: Some cleanups for the scheduling mess.
    
    * Fix build against libdrm that doesn't define *_VBLANK_HIGH_CRTC*.
    * If we have more than two CRTCs but can't use DRM_VBLANK_HIGH_CRTC_MASK, don't
      enable scheduling in the first place rather than relying on
      DRM_VBLANK_SECONDARY magically doing something sensible for higher CRTCs.
    * Only set up client state tracking when scheduling is enabled.
    * Only declare pRADEONEnt when it's needed, and break long lines.

diff --git a/src/radeon.h b/src/radeon.h
index 1a746c7..a6d20d7 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -931,9 +931,6 @@ typedef struct {
 
     RADEONFBLayout    CurrentLayout;
 
-#ifdef RADEON_DRI2
-    Bool              high_crtc_works;
-#endif
 #ifdef XF86DRI
     Bool              directRenderingEnabled;
     Bool              directRenderingInited;
diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index 8b31483..e618cc5 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -777,13 +777,16 @@ static drmVBlankSeqType populate_vbl_request_type(RADEONInfoPtr info, int crtc)
 
     if (crtc == 1)
         type |= DRM_VBLANK_SECONDARY;
-    else if (crtc > 1) {
-	if (info->high_crtc_works) {
-	    type |= (crtc << DRM_VBLANK_HIGH_CRTC_SHIFT) &
+    else if (crtc > 1)
+#ifdef DRM_VBLANK_HIGH_CRTC_MASK
+	type |= (crtc << DRM_VBLANK_HIGH_CRTC_SHIFT) &
 		DRM_VBLANK_HIGH_CRTC_MASK;
-	} else
-	    type |= DRM_VBLANK_SECONDARY;
-    }
+#else
+	ErrorF("radeon driver bug: %s called for CRTC %d > 1, but "
+	       "DRM_VBLANK_HIGH_CRTC_MASK not defined at build time\n",
+	       __func__, crtc);
+#endif
+
     return type; 
 }
 
@@ -1221,12 +1224,12 @@ Bool
 radeon_dri2_screen_init(ScreenPtr pScreen)
 {
     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
-    RADEONEntPtr pRADEONEnt   = RADEONEntPriv(pScrn);
     RADEONInfoPtr info = RADEONPTR(pScrn);
     DRI2InfoRec dri2_info = { 0 };
 #ifdef USE_DRI2_SCHEDULING
+    RADEONEntPtr pRADEONEnt   = RADEONEntPriv(pScrn);
     const char *driverNames[1];
-    uint64_t cap_value;
+    Bool scheduling_works = TRUE;
 #endif
 
     if (!info->useEXA) {
@@ -1258,9 +1261,34 @@ radeon_dri2_screen_init(ScreenPtr pScreen)
 #endif
     dri2_info.CopyRegion = radeon_dri2_copy_region;
 
-    info->high_crtc_works = FALSE;
 #ifdef USE_DRI2_SCHEDULING
-    if (info->dri->pKernelDRMVersion->version_minor >= 4) {
+    if (info->dri->pKernelDRMVersion->version_minor < 4) {
+	xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need a newer kernel for "
+		   "sync extension\n");
+	scheduling_works = FALSE;
+    }
+
+    if (scheduling_works && info->drmmode.mode_res->count_crtcs > 2) {
+#ifdef DRM_CAP_VBLANK_HIGH_CRTC
+	uint64_t cap_value;
+
+	if (drmGetCap(info->dri2.drm_fd, DRM_CAP_VBLANK_HIGH_CRTC, &cap_value)) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need a newer kernel "
+		       "for VBLANKs on CRTC > 1\n");
+	    scheduling_works = FALSE;
+	} else if (!cap_value) {
+	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Your kernel does not "
+		       "handle VBLANKs on CRTC > 1\n");
+	    scheduling_works = FALSE;
+	}
+#else
+	xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need to rebuild against a "
+		   "newer libdrm to handle VBLANKs on CRTC > 1\n");
+	scheduling_works = FALSE;
+#endif
+    }
+
+    if (scheduling_works) {
         dri2_info.version = 4;
         dri2_info.ScheduleSwap = radeon_dri2_schedule_swap;
         dri2_info.GetMSC = radeon_dri2_get_msc;
@@ -1268,40 +1296,29 @@ radeon_dri2_screen_init(ScreenPtr pScreen)
         dri2_info.numDrivers = 1;
         dri2_info.driverNames = driverNames;
         driverNames[0] = dri2_info.driverName;
-    } else {
-        xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need a newer kernel for sync extension\n");
-    }
 
-    if (info->drmmode.mode_res->count_crtcs > 2) {
-	if (drmGetCap(info->dri2.drm_fd, DRM_CAP_VBLANK_HIGH_CRTC, &cap_value)) {
-	    info->high_crtc_works = FALSE;
-	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need a newer kernel for VBLANKs on CRTC > 1\n");
-	} else {
-	    if (cap_value) {
-		info->high_crtc_works = TRUE;
-	    } else {
-		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Your kernel does not handle VBLANKs on CRTC > 1\n");
-		info->high_crtc_works = FALSE;
-	    }
-	}
-    }
-
-    if (pRADEONEnt->dri2_info_cnt == 0) {
+	if (pRADEONEnt->dri2_info_cnt == 0) {
 #if HAS_DIXREGISTERPRIVATEKEY
-	if (!dixRegisterPrivateKey(DRI2ClientEventsPrivateKey, PRIVATE_CLIENT, sizeof(DRI2ClientEventsRec))) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DRI2 registering private key to client failed\n");
-	    return FALSE;
-	}
+	    if (!dixRegisterPrivateKey(DRI2ClientEventsPrivateKey,
+				       PRIVATE_CLIENT, sizeof(DRI2ClientEventsRec))) {
+		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DRI2 registering "
+			   "private key to client failed\n");
+		return FALSE;
+	    }
 #else
-	if (!dixRequestPrivate(DRI2ClientEventsPrivateKey, sizeof(DRI2ClientEventsRec))) {
-	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DRI2 requesting private key to client failed\n");
-	    return FALSE;
-	}
+	    if (!dixRequestPrivate(DRI2ClientEventsPrivateKey,
+				   sizeof(DRI2ClientEventsRec))) {
+		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "DRI2 requesting "
+			   "private key to client failed\n");
+		return FALSE;
+	    }
 #endif
 
-	AddCallback(&ClientStateCallback, radeon_dri2_client_state_changed, 0);
+	    AddCallback(&ClientStateCallback, radeon_dri2_client_state_changed, 0);
+	}
+
+	pRADEONEnt->dri2_info_cnt++;
     }
-    pRADEONEnt->dri2_info_cnt++;
 #endif
 
     info->dri2.enabled = DRI2ScreenInit(pScreen, &dri2_info);
@@ -1312,9 +1329,9 @@ void radeon_dri2_close_screen(ScreenPtr pScreen)
 {
     ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
     RADEONInfoPtr info = RADEONPTR(pScrn);
+#ifdef USE_DRI2_SCHEDULING
     RADEONEntPtr pRADEONEnt   = RADEONEntPriv(pScrn);
 
-#ifdef USE_DRI2_SCHEDULING
     if (--pRADEONEnt->dri2_info_cnt == 0)
     	DeleteCallback(&ClientStateCallback, radeon_dri2_client_state_changed, 0);
 #endif
commit 7acf9bc833de539fa2259a051c66a99445a54bc4
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Mon Apr 4 11:08:37 2011 -0400

    radeon: add some additional ontario pci ids
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/ati_pciids_gen.h b/src/ati_pciids_gen.h
index 598dd72..ba46c17 100644
--- a/src/ati_pciids_gen.h
+++ b/src/ati_pciids_gen.h
@@ -461,6 +461,8 @@
 #define PCI_CHIP_PALM_9803 0x9803
 #define PCI_CHIP_PALM_9804 0x9804
 #define PCI_CHIP_PALM_9805 0x9805
+#define PCI_CHIP_PALM_9806 0x9806
+#define PCI_CHIP_PALM_9807 0x9807
 #define PCI_CHIP_CYPRESS_6880 0x6880
 #define PCI_CHIP_CYPRESS_6888 0x6888
 #define PCI_CHIP_CYPRESS_6889 0x6889
diff --git a/src/pcidb/ati_pciids.csv b/src/pcidb/ati_pciids.csv
index 07454af..fd1fdbc 100644
--- a/src/pcidb/ati_pciids.csv
+++ b/src/pcidb/ati_pciids.csv
@@ -462,6 +462,8 @@
 "0x9803","PALM_9803","PALM",,1,,,1,"AMD Radeon HD 6310 Graphics"
 "0x9804","PALM_9804","PALM",,1,,,1,"AMD Radeon HD 6250 Graphics"
 "0x9805","PALM_9805","PALM",,1,,,1,"AMD Radeon HD 6250 Graphics"
+"0x9806","PALM_9806","PALM",,1,,,1,"AMD Radeon HD 6300 Series Graphics"
+"0x9807","PALM_9807","PALM",,1,,,1,"AMD Radeon HD 6200 Series Graphics"
 "0x6880","CYPRESS_6880","CYPRESS",1,,,,,"CYPRESS"
 "0x6888","CYPRESS_6888","CYPRESS",,,,,,"ATI FirePro (FireGL) Graphics Adapter"
 "0x6889","CYPRESS_6889","CYPRESS",,,,,,"ATI FirePro (FireGL) Graphics Adapter"
diff --git a/src/radeon_chipinfo_gen.h b/src/radeon_chipinfo_gen.h
index 2caeed9..630154b 100644
--- a/src/radeon_chipinfo_gen.h
+++ b/src/radeon_chipinfo_gen.h
@@ -381,6 +381,8 @@ static RADEONCardInfo RADEONCards[] = {
  { 0x9803, CHIP_FAMILY_PALM, 0, 1, 0, 0, 1 },
  { 0x9804, CHIP_FAMILY_PALM, 0, 1, 0, 0, 1 },
  { 0x9805, CHIP_FAMILY_PALM, 0, 1, 0, 0, 1 },
+ { 0x9806, CHIP_FAMILY_PALM, 0, 1, 0, 0, 1 },
+ { 0x9807, CHIP_FAMILY_PALM, 0, 1, 0, 0, 1 },
  { 0x6880, CHIP_FAMILY_CYPRESS, 1, 0, 0, 0, 0 },
  { 0x6888, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
  { 0x6889, CHIP_FAMILY_CYPRESS, 0, 0, 0, 0, 0 },
diff --git a/src/radeon_chipset_gen.h b/src/radeon_chipset_gen.h
index e4c38f0..9996e0a 100644
--- a/src/radeon_chipset_gen.h
+++ b/src/radeon_chipset_gen.h
@@ -381,6 +381,8 @@ static SymTabRec RADEONChipsets[] = {
   { PCI_CHIP_PALM_9803, "AMD Radeon HD 6310 Graphics" },
   { PCI_CHIP_PALM_9804, "AMD Radeon HD 6250 Graphics" },
   { PCI_CHIP_PALM_9805, "AMD Radeon HD 6250 Graphics" },
+  { PCI_CHIP_PALM_9806, "AMD Radeon HD 6300 Series Graphics" },
+  { PCI_CHIP_PALM_9807, "AMD Radeon HD 6200 Series Graphics" },
   { PCI_CHIP_CYPRESS_6880, "CYPRESS" },
   { PCI_CHIP_CYPRESS_6888, "ATI FirePro (FireGL) Graphics Adapter" },
   { PCI_CHIP_CYPRESS_6889, "ATI FirePro (FireGL) Graphics Adapter" },
diff --git a/src/radeon_pci_chipset_gen.h b/src/radeon_pci_chipset_gen.h
index 1165049..67fec23 100644
--- a/src/radeon_pci_chipset_gen.h
+++ b/src/radeon_pci_chipset_gen.h
@@ -381,6 +381,8 @@ PciChipsets RADEONPciChipsets[] = {
  { PCI_CHIP_PALM_9803, PCI_CHIP_PALM_9803, RES_SHARED_VGA },
  { PCI_CHIP_PALM_9804, PCI_CHIP_PALM_9804, RES_SHARED_VGA },
  { PCI_CHIP_PALM_9805, PCI_CHIP_PALM_9805, RES_SHARED_VGA },
+ { PCI_CHIP_PALM_9806, PCI_CHIP_PALM_9806, RES_SHARED_VGA },
+ { PCI_CHIP_PALM_9807, PCI_CHIP_PALM_9807, RES_SHARED_VGA },
  { PCI_CHIP_CYPRESS_6880, PCI_CHIP_CYPRESS_6880, RES_SHARED_VGA },
  { PCI_CHIP_CYPRESS_6888, PCI_CHIP_CYPRESS_6888, RES_SHARED_VGA },
  { PCI_CHIP_CYPRESS_6889, PCI_CHIP_CYPRESS_6889, RES_SHARED_VGA },
diff --git a/src/radeon_pci_device_match_gen.h b/src/radeon_pci_device_match_gen.h
index 4bebfeb..60b975c 100644
--- a/src/radeon_pci_device_match_gen.h
+++ b/src/radeon_pci_device_match_gen.h
@@ -381,6 +381,8 @@ static const struct pci_id_match radeon_device_match[] = {
  ATI_DEVICE_MATCH( PCI_CHIP_PALM_9803, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_PALM_9804, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_PALM_9805, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_PALM_9806, 0 ),
+ ATI_DEVICE_MATCH( PCI_CHIP_PALM_9807, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_6880, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_6888, 0 ),
  ATI_DEVICE_MATCH( PCI_CHIP_CYPRESS_6889, 0 ),
commit 0af6386f85a0f1ba14864a3334164733a10a6cb8
Author: Ilija Hadzic <ihadzic at research.bell-labs.com>
Date:   Thu Mar 24 13:33:27 2011 -0400

    xf86-video-ati: (revised #2) add support for vblank on crtc > 1
    
    Hi Alex,
    
    Enclosed is a revised version of two patches sent on Mar 18 and Mar 22,
    respectively. Details summarized in these two threads:
    http://lists.freedesktop.org/archives/dri-devel/2011-March/009463.html
    http://lists.freedesktop.org/archives/dri-devel/2011-March/009582.html
    
    This patch reconciles the DDX with the change in libdrm sent to this list
    earlier today. Specifically, it refers to a symbol that has been renamed
    from DRM_CAP_HIGH_CRTC to DRM_CAP_VBLANK_HIGH_CRTC. It *supersedes* the
    previous patch (i.e. apply it to the master branch as it exists at the
    time of this writing, not as an incremental patch to the one sent previously).
    
    Regards,
    
    Ilija
    
    Signed-off-by: Ilija Hadzic <ihadzic at research.bell-labs.com>
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/radeon.h b/src/radeon.h
index a6d20d7..1a746c7 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -931,6 +931,9 @@ typedef struct {
 
     RADEONFBLayout    CurrentLayout;
 
+#ifdef RADEON_DRI2
+    Bool              high_crtc_works;
+#endif
 #ifdef XF86DRI
     Bool              directRenderingEnabled;
     Bool              directRenderingInited;
diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c
index 66df03c..8b31483 100644
--- a/src/radeon_dri2.c
+++ b/src/radeon_dri2.c
@@ -771,6 +771,22 @@ cleanup:
     free(event);
 }
 
+static drmVBlankSeqType populate_vbl_request_type(RADEONInfoPtr info, int crtc)
+{
+    drmVBlankSeqType type = 0;
+
+    if (crtc == 1)
+        type |= DRM_VBLANK_SECONDARY;
+    else if (crtc > 1) {
+	if (info->high_crtc_works) {
+	    type |= (crtc << DRM_VBLANK_HIGH_CRTC_SHIFT) &
+		DRM_VBLANK_HIGH_CRTC_MASK;
+	} else
+	    type |= DRM_VBLANK_SECONDARY;
+    }
+    return type; 
+}
+
 /*
  * Get current frame count and frame count timestamp, based on drawable's
  * crtc.
@@ -791,8 +807,7 @@ static int radeon_dri2_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc)
         return TRUE;
     }
     vbl.request.type = DRM_VBLANK_RELATIVE;
-    if (crtc > 0)
-        vbl.request.type |= DRM_VBLANK_SECONDARY;
+    vbl.request.type |= populate_vbl_request_type(info, crtc);
     vbl.request.sequence = 0;
 
     ret = drmWaitVBlank(info->dri2.drm_fd, &vbl);
@@ -855,8 +870,7 @@ static int radeon_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw,
 
     /* Get current count */
     vbl.request.type = DRM_VBLANK_RELATIVE;
-    if (crtc > 0)
-        vbl.request.type |= DRM_VBLANK_SECONDARY;
+    vbl.request.type |= populate_vbl_request_type(info, crtc);
     vbl.request.sequence = 0;
     ret = drmWaitVBlank(info->dri2.drm_fd, &vbl);
     if (ret) {
@@ -882,8 +896,7 @@ static int radeon_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw,
         if (current_msc >= target_msc)
             target_msc = current_msc;
         vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
-        if (crtc > 0)
-            vbl.request.type |= DRM_VBLANK_SECONDARY;
+	vbl.request.type |= populate_vbl_request_type(info, crtc);
         vbl.request.sequence = target_msc;
         vbl.request.signal = (unsigned long)wait_info;
         ret = drmWaitVBlank(info->dri2.drm_fd, &vbl);
@@ -903,8 +916,7 @@ static int radeon_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw,
      * so we queue an event that will satisfy the divisor/remainder equation.
      */
     vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
-    if (crtc > 0)
-        vbl.request.type |= DRM_VBLANK_SECONDARY;
+    vbl.request.type |= populate_vbl_request_type(info, crtc);
 
     vbl.request.sequence = current_msc - (current_msc % divisor) +
         remainder;
@@ -1068,8 +1080,7 @@ static int radeon_dri2_schedule_swap(ClientPtr client, DrawablePtr draw,
 
     /* Get current count */
     vbl.request.type = DRM_VBLANK_RELATIVE;
-    if (crtc > 0)
-        vbl.request.type |= DRM_VBLANK_SECONDARY;
+    vbl.request.type |= populate_vbl_request_type(info, crtc);
     vbl.request.sequence = 0;
     ret = drmWaitVBlank(info->dri2.drm_fd, &vbl);
     if (ret) {
@@ -1111,8 +1122,7 @@ static int radeon_dri2_schedule_swap(ClientPtr client, DrawablePtr draw,
          */
         if (flip == 0)
             vbl.request.type |= DRM_VBLANK_NEXTONMISS;
-        if (crtc > 0)
-            vbl.request.type |= DRM_VBLANK_SECONDARY;
+	vbl.request.type |= populate_vbl_request_type(info, crtc);
 
         /* If target_msc already reached or passed, set it to
          * current_msc to ensure we return a reasonable value back
@@ -1145,8 +1155,7 @@ static int radeon_dri2_schedule_swap(ClientPtr client, DrawablePtr draw,
     vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT;
     if (flip == 0)
         vbl.request.type |= DRM_VBLANK_NEXTONMISS;
-    if (crtc > 0)
-        vbl.request.type |= DRM_VBLANK_SECONDARY;
+    vbl.request.type |= populate_vbl_request_type(info, crtc);
 
     vbl.request.sequence = current_msc - (current_msc % divisor) +
         remainder;
@@ -1217,6 +1226,7 @@ radeon_dri2_screen_init(ScreenPtr pScreen)
     DRI2InfoRec dri2_info = { 0 };
 #ifdef USE_DRI2_SCHEDULING
     const char *driverNames[1];
+    uint64_t cap_value;
 #endif
 
     if (!info->useEXA) {
@@ -1248,6 +1258,7 @@ radeon_dri2_screen_init(ScreenPtr pScreen)
 #endif
     dri2_info.CopyRegion = radeon_dri2_copy_region;
 
+    info->high_crtc_works = FALSE;
 #ifdef USE_DRI2_SCHEDULING
     if (info->dri->pKernelDRMVersion->version_minor >= 4) {
         dri2_info.version = 4;
@@ -1261,6 +1272,20 @@ radeon_dri2_screen_init(ScreenPtr pScreen)
         xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need a newer kernel for sync extension\n");
     }
 
+    if (info->drmmode.mode_res->count_crtcs > 2) {
+	if (drmGetCap(info->dri2.drm_fd, DRM_CAP_VBLANK_HIGH_CRTC, &cap_value)) {
+	    info->high_crtc_works = FALSE;
+	    xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "You need a newer kernel for VBLANKs on CRTC > 1\n");
+	} else {
+	    if (cap_value) {
+		info->high_crtc_works = TRUE;
+	    } else {
+		xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Your kernel does not handle VBLANKs on CRTC > 1\n");
+		info->high_crtc_works = FALSE;
+	    }
+	}
+    }
+
     if (pRADEONEnt->dri2_info_cnt == 0) {
 #if HAS_DIXREGISTERPRIVATEKEY
 	if (!dixRegisterPrivateKey(DRI2ClientEventsPrivateKey, PRIVATE_CLIENT, sizeof(DRI2ClientEventsRec))) {
commit fe2e0ad3ffa58f40311319c950b842e2928a5740
Author: matthew green <mrg at eterna.com.au>
Date:   Mon Mar 21 12:17:58 2011 -0400

    bug fix for r6xx/r7xx UMS
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/r600_exa.c b/src/r600_exa.c
index 0ed0ced..7736d24 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -621,8 +621,12 @@ R600PrepareCopy(PixmapPtr pSrc,   PixmapPtr pDst,
 	return FALSE;
 
     if (accel_state->same_surface == TRUE) {
+#if defined(XF86DRM_MODE)
 	unsigned height = RADEON_ALIGN(pDst->drawable.height,
 				       drmmode_get_height_align(pScrn, accel_state->dst_obj.tiling_flags));
+#else
+	unsigned height = pDst->drawable.height;
+#endif
 	unsigned long size = height * accel_state->dst_obj.pitch * pDst->drawable.bitsPerPixel/8;
 
 #if defined(XF86DRM_MODE)
diff --git a/src/radeon_textured_video.c b/src/radeon_textured_video.c
index c886ed0..d247db6 100644
--- a/src/radeon_textured_video.c
+++ b/src/radeon_textured_video.c
@@ -248,7 +248,11 @@ RADEONPutImageTextured(ScrnInfoPtr pScrn,
     BoxRec dstBox;
     int dst_width = width, dst_height = height;
     int aligned_height;
+#ifdef XF86DRM_MODE
     int h_align = drmmode_get_height_align(pScrn, 0);
+#else
+    int h_align = 1;
+#endif
     /* make the compiler happy */
     s2offset = s3offset = srcPitch2 = 0;
 
commit 83978ad5fe37581e2b2f3fbd9c073d91b2ae1d50
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Thu Mar 17 19:07:43 2011 -0400

    bump version post release
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/configure.ac b/configure.ac
index 6f72c2d..ec54478 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-ati],
-        [6.14.1],
+        [6.14.99],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-ati])
 
commit 38d9368e59b2990bf32a028ece2132451b402350
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Thu Mar 17 18:59:55 2011 -0400

    bump for release
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/configure.ac b/configure.ac
index ec54478..6f72c2d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -23,7 +23,7 @@
 # Initialize Autoconf
 AC_PREREQ([2.60])
 AC_INIT([xf86-video-ati],
-        [6.14.99],
+        [6.14.1],
         [https://bugs.freedesktop.org/enter_bug.cgi?product=xorg],
         [xf86-video-ati])
 
commit cdfc007eccc9b130fc08012deef304488eb6e469
Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Mar 16 10:40:42 2011 +1000

    radeon/exa: correct function name
    
    this corrects the function name so it matches the contents.
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>

diff --git a/src/evergreen_exa.c b/src/evergreen_exa.c
index 553f05e..ae73e38 100644
--- a/src/evergreen_exa.c
+++ b/src/evergreen_exa.c
@@ -731,7 +731,7 @@ static Bool EVERGREENCheckCompositeTexture(PicturePtr pPict,
 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
     }
 
-    if (!radeon_transform_is_affine(pPict->transform))
+    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
 
     return TRUE;
diff --git a/src/r600_exa.c b/src/r600_exa.c
index 57e5c0f..0ed0ced 100644
--- a/src/r600_exa.c
+++ b/src/r600_exa.c
@@ -910,7 +910,7 @@ static Bool R600CheckCompositeTexture(PicturePtr pPict,
 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
     }
 
-    if (!radeon_transform_is_affine(pPict->transform))
+    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
 
     return TRUE;
diff --git a/src/radeon.h b/src/radeon.h
index 4f5095d..a6d20d7 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -1290,7 +1290,7 @@ extern void RADEONPMFini(ScrnInfoPtr pScrn);
 #ifdef USE_EXA
 /* radeon_exa.c */
 extern Bool RADEONSetupMemEXA(ScreenPtr pScreen);
-extern Bool radeon_transform_is_affine(PictTransformPtr t);
+extern Bool radeon_transform_is_affine_or_scaled(PictTransformPtr t);
 
 /* radeon_exa_funcs.c */
 extern void RADEONCopyCP(PixmapPtr pDst, int srcX, int srcY, int dstX,
diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 4f31b82..1c647b9 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -179,7 +179,7 @@ Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, uint32_t *pitch_offset)
  *
  * transform may be null.
  */
-Bool radeon_transform_is_affine(PictTransformPtr t)
+Bool radeon_transform_is_affine_or_scaled(PictTransformPtr t)
 {
 	if (t == NULL)
 		return TRUE;
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 2e13b00..43d3555 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -379,7 +379,7 @@ static Bool R100CheckCompositeTexture(PicturePtr pPict,
 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
     }
 
-    if (!radeon_transform_is_affine(pPict->transform))
+    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
 
     return TRUE;
@@ -781,7 +781,7 @@ static Bool R200CheckCompositeTexture(PicturePtr pPict,
 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
     }
 
-    if (!radeon_transform_is_affine(pPict->transform))
+    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
 
     return TRUE;
@@ -1168,7 +1168,7 @@ static Bool R300CheckCompositeTexture(PicturePtr pPict,
 	    RADEON_FALLBACK(("REPEAT_NONE unsupported for transformed xRGB source\n"));
     }
 
-    if (!radeon_transform_is_affine(pPict->transform))
+    if (!radeon_transform_is_affine_or_scaled(pPict->transform))
 	RADEON_FALLBACK(("non-affine transforms not supported\n"));
 
     return TRUE;
commit b93d7658f669f6dc1cfacebcfe955a1e113a537c
Author: Dave Airlie <airlied at redhat.com>
Date:   Wed Mar 16 10:37:22 2011 +1000

    radeon/exa: fix scaling check
    
    This caused a regression in firefox, as these numbers are xFixed
    values hence 1 is actually 65536.
    
    Should fix: https://bugzilla.redhat.com/show_bug.cgi?id=685084
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>

diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index aa3d55e..4f31b82 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -184,7 +184,7 @@ Bool radeon_transform_is_affine(PictTransformPtr t)
 	if (t == NULL)
 		return TRUE;
 	/* the shaders don't handle scaling either */
-	return t->matrix[2][0] == 0 && t->matrix[2][1] == 0 && t->matrix[2][2] == 1;
+	return t->matrix[2][0] == 0 && t->matrix[2][1] == 0 && t->matrix[2][2] == IntToxFixed(1);
 }
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN
commit 8567b8cd9e136ef69906e02286b3752db2404741
Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Tue Mar 15 13:13:06 2011 -0400

    APU: no tiling yet
    
    Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

diff --git a/src/radeon_kms.c b/src/radeon_kms.c
index 125a132..0760170 100644
--- a/src/radeon_kms.c
+++ b/src/radeon_kms.c
@@ -651,6 +651,10 @@ Bool RADEONPreInit_KMS(ScrnInfoPtr pScrn, int flags)
 	    } else
 		xf86DrvMsg(pScrn->scrnIndex, X_INFO,
 			   "R6xx+ KMS Color Tiling requires radeon drm 2.6.0 or newer\n");
+
+	    /* don't support tiling on APUs yet */
+	    if (info->ChipFamily == CHIP_FAMILY_PALM)
+		info->allowColorTiling = FALSE;
 	} else
 	    info->allowColorTiling = xf86ReturnOptValBool(info->Options,
 							  OPTION_COLOR_TILING, colorTilingDefault);
commit 4d350497012fa31a417ada662006e2d64db2a4b5
Author: Dave Airlie <airlied at redhat.com>
Date:   Tue Mar 15 10:32:19 2011 +1000

    radeon: exa shaders don't handle scaling either.
    
    rendercheck tsrccoords test fails.
    
    Signed-off-by: Dave Airlie <airlied at redhat.com>

diff --git a/src/radeon_exa.c b/src/radeon_exa.c
index 3dbdcae..aa3d55e 100644
--- a/src/radeon_exa.c
+++ b/src/radeon_exa.c
@@ -183,7 +183,8 @@ Bool radeon_transform_is_affine(PictTransformPtr t)
 {
 	if (t == NULL)
 		return TRUE;
-	return t->matrix[2][0] == 0 && t->matrix[2][1] == 0;
+	/* the shaders don't handle scaling either */
+	return t->matrix[2][0] == 0 && t->matrix[2][1] == 0 && t->matrix[2][2] == 1;
 }
 
 #if X_BYTE_ORDER == X_BIG_ENDIAN


More information about the xorg-commit mailing list